From 810f49cbca28ab2e6be50ffc2a230781e07903c1 Mon Sep 17 00:00:00 2001 From: David Garske Date: Tue, 3 Feb 2026 16:17:42 -0800 Subject: [PATCH 01/11] NXP T2080 Port Refresh --- arch.mk | 7 + hal/nxp_ppc.h | 68 ++++-- hal/nxp_t2080.c | 520 ++++++++++++------------------------------- hal/nxp_t2080.h | 312 ++++++++++++++++++++++++++ hal/nxp_t2080.ld | 4 +- src/boot_ppc_start.S | 159 +++++++++---- 6 files changed, 636 insertions(+), 434 deletions(-) create mode 100644 hal/nxp_t2080.h diff --git a/arch.mk b/arch.mk index 084e1e63bc..0e2446ccbf 100644 --- a/arch.mk +++ b/arch.mk @@ -633,6 +633,13 @@ ifeq ($(ARCH),PPC) CFLAGS+=-fno-builtin-printf endif + # Target-specific CPU flags + ifeq ($(TARGET),nxp_t2080) + CFLAGS+=-mcpu=e6500 -mno-altivec + else ifeq ($(TARGET),nxp_t1024) + CFLAGS+=-mcpu=e5500 + endif + # Prune unused functions and data CFLAGS+=-ffunction-sections -fdata-sections LDFLAGS+=-Wl,--gc-sections diff --git a/hal/nxp_ppc.h b/hal/nxp_ppc.h index b72bad4042..1edd9f698e 100644 --- a/hal/nxp_ppc.h +++ b/hal/nxp_ppc.h @@ -101,7 +101,7 @@ #define USE_LONG_JUMP #elif defined(TARGET_nxp_t2080) - /* NXP T0280 */ + /* NXP T2080 */ #define CORE_E6500 #define CPU_NUMCORES 4 #define CORES_PER_CLUSTER 4 @@ -118,13 +118,16 @@ #define ENABLE_L1_CACHE #define ENABLE_L2_CACHE - #define L2SRAM_ADDR (0xF8F80000UL) /* L2 as SRAM */ - #define L2SRAM_SIZE (256UL * 1024UL) + #define L2SRAM_ADDR (0xF8F00000UL) /* CPC as SRAM (1MB) */ + #define L2SRAM_SIZE (1024UL * 1024UL) #define INITIAL_SRAM_ADDR L2SRAM_ADDR - #define INITIAL_SRAM_LAW_SZ LAW_SIZE_256KB + /* CPC SRAM transactions traverse the CoreNet interconnect, which + * requires a LAW to route them. LAW_TRGT_DDR_1 is used as a routing + * target; the CPC intercepts the transaction before it reaches DDR. */ + #define INITIAL_SRAM_LAW_SZ LAW_SIZE_1MB #define INITIAL_SRAM_LAW_TRGT LAW_TRGT_DDR_1 - #define INITIAL_SRAM_BOOKE_SZ BOOKE_PAGESZ_256K + #define INITIAL_SRAM_BOOKE_SZ BOOKE_PAGESZ_1M #define ENABLE_INTERRUPTS @@ -285,20 +288,23 @@ #define CPC_BASE (CCSRBAR + 0x10000) /* 8.2 CoreNet Platform Cache (CPC) Memory Map */ #define CPCCSR0 (0x000) + #define CPCEWCR0 (0x010) #define CPCSRCR1 (0x100) #define CPCSRCR0 (0x104) + #define CPCERRDIS (0xE44) #define CPCHDBCR0 (0xF00) #define CPCCSR0_CPCE (0x80000000 >> 0) #define CPCCSR0_CPCPE (0x80000000 >> 1) #define CPCCSR0_CPCFI (0x80000000 >> 10) + #define CPCCSR0_CPCFL (0x80000000 >> 20) #define CPCCSR0_CPCLFC (0x80000000 >> 21) - #define CPCCSR0_SRAM_ENABLE (CPCCSR0_CPCE | CPCCSR0_CPCPE) #ifdef CORE_E6500 - #define CPCSRCR0_SRAMSZ_64 (0x1 << 1) /* ways 14-15 */ - #define CPCSRCR0_SRAMSZ_256 (0x3 << 1) /* ways 8-15 */ - #define CPCSRCR0_SRAMSZ_512 (0x4 << 1) /* ways 0-15 */ + /* T2080: 2MB CPC, 16 ways, 128KB per way */ + #define CPCSRCR0_SRAMSZ_256 (0x1 << 1) /* ways 14-15, 256KB */ + #define CPCSRCR0_SRAMSZ_1024 (0x3 << 1) /* ways 8-15, 1MB */ + #define CPCSRCR0_SRAMSZ_2048 (0x4 << 1) /* ways 0-15, 2MB */ #else /* CORE E5500 */ #define CPCSRCR0_SRAMSZ_64 (0x1 << 1) /* ways 6-7 */ #define CPCSRCR0_SRAMSZ_128 (0x2 << 1) /* ways 4-7 */ @@ -483,13 +489,21 @@ #define SPRN_DBSR 0x130 /* Debug Status Register */ #define SPRN_DEC 0x016 /* Decrement Register */ -#define SPRN_TSR 0x3D8 /* Timer Status Register */ -#define SPRN_TCR 0x3DA /* Timer Control Register */ +#ifdef CORE_E6500 + #define SPRN_TSR 0x150 /* Timer Status Register (SPR 336) */ + #define SPRN_TCR 0x154 /* Timer Control Register (SPR 340) */ + #define SPRN_DEAR 0x03D /* Data Exception Address Register (SPR 61) */ + #define SPRN_ESR 0x03E /* Exception Syndrome Register (SPR 62) */ +#else + #define SPRN_TSR 0x3D8 /* Timer Status Register */ + #define SPRN_TCR 0x3DA /* Timer Control Register */ + #define SPRN_DEAR 0x3D5 /* Data Exception Address Register */ + #define SPRN_ESR 0x3D4 /* Exception Syndrome Register */ +#endif + #define TCR_WIE 0x08000000 /* Watchdog Interrupt Enable */ #define TCR_DIE 0x04000000 /* Decrement Interrupt Enable */ - -#define SPRN_ESR 0x3D4 /* Exception Syndrome Register */ #define SPRN_MCSR 0x23C /* Machine Check Syndrome Register */ #define SPRN_PVR 0x11F /* Processor Version */ #define SPRN_SVR 0x3FF /* System Version */ @@ -524,6 +538,8 @@ #define SRR0 0x01A /* Save/Restore Register 0 */ #define SRR1 0x01B /* Save/Restore Register 1 */ +#define SPRN_MCSRR0 0x23A /* Machine Check Save/Restore Register 0 */ +#define SPRN_MCSRR1 0x23B /* Machine Check Save/Restore Register 1 */ #define MSR_DS (1<<4) /* Book E Data address space */ #define MSR_IS (1<<5) /* Book E Instruction address space */ @@ -674,6 +690,31 @@ extern void dcache_disable(void); #else /* Assembly version */ +#ifdef CORE_E6500 +/* e6500 has 64-bit MAS registers - must clear upper 32 bits */ +#define set_tlb(tlb, esel, epn, rpn, urpn, perms, winge, ts, tsize, iprot, reg) \ + lis reg, BOOKE_MAS0(tlb, esel, 0)@h; \ + ori reg, reg, BOOKE_MAS0(tlb, esel, 0)@l; \ + mtspr MAS0, reg;\ + lis reg, BOOKE_MAS1(1, iprot, 0, ts, tsize)@h; \ + ori reg, reg, BOOKE_MAS1(1, iprot, 0, ts, tsize)@l; \ + mtspr MAS1, reg; \ + li reg, 0; \ + oris reg, reg, BOOKE_MAS2(epn, winge)@h; \ + ori reg, reg, BOOKE_MAS2(epn, winge)@l; \ + mtspr MAS2, reg; \ + lis reg, BOOKE_MAS3(rpn, 0, perms)@h; \ + ori reg, reg, BOOKE_MAS3(rpn, 0, perms)@l; \ + mtspr MAS3, reg; \ + lis reg, urpn@h; \ + ori reg, reg, urpn@l; \ + mtspr MAS7, reg; \ + isync; \ + msync; \ + tlbwe; \ + isync; +#else +/* e500/e5500 - 32-bit MAS registers */ #define set_tlb(tlb, esel, epn, rpn, urpn, perms, winge, ts, tsize, iprot, reg) \ lis reg, BOOKE_MAS0(tlb, esel, 0)@h; \ ori reg, reg, BOOKE_MAS0(tlb, esel, 0)@l; \ @@ -694,6 +735,7 @@ extern void dcache_disable(void); msync; \ tlbwe; \ isync; +#endif /* CORE_E6500 */ /* readability helpers for assembly to show register versus decimal */ #define r0 0 diff --git a/hal/nxp_t2080.c b/hal/nxp_t2080.c index 6cc3159791..e5a2272f78 100644 --- a/hal/nxp_t2080.c +++ b/hal/nxp_t2080.c @@ -23,277 +23,9 @@ #include "printf.h" #include "image.h" /* for RAMFUNCTION */ #include "nxp_ppc.h" +#include "nxp_t2080.h" -/* Tested on T2080E Rev 1.1, e6500 core 2.0, PVR 8040_0120 and SVR 8538_0011 */ - -/* T2080 */ -#define SYS_CLK (600000000) /* 100MHz PLL with 6:1 = 600 MHz */ - -/* T2080 PC16552D Dual UART */ -#define BAUD_RATE 115200 -#define UART_SEL 0 /* select UART 0 or 1 */ - -#define UART_BASE(n) (CCSRBAR + 0x11C500 + (n * 0x1000)) - -#define UART_RBR(n) *((volatile uint8_t*)(UART_BASE(n) + 0)) /* receiver buffer register */ -#define UART_THR(n) *((volatile uint8_t*)(UART_BASE(n) + 0)) /* transmitter holding register */ -#define UART_IER(n) *((volatile uint8_t*)(UART_BASE(n) + 1)) /* interrupt enable register */ -#define UART_IIR(n) *((volatile uint8_t*)(UART_BASE(n) + 2)) /* interrupt ID register */ -#define UART_FCR(n) *((volatile uint8_t*)(UART_BASE(n) + 2)) /* FIFO control register */ -#define UART_LCR(n) *((volatile uint8_t*)(UART_BASE(n) + 3)) /* line control register */ -#define UART_MCR(n) *((volatile uint8_t*)(UART_BASE(n) + 4)) /* modem control register */ -#define UART_LSR(n) *((volatile uint8_t*)(UART_BASE(n) + 5)) /* line status register */ - -/* enabled when UART_LCR_DLAB set */ -#define UART_DLB(n) *((volatile uint8_t*)(UART_BASE(n) + 0)) /* divisor least significant byte register */ -#define UART_DMB(n) *((volatile uint8_t*)(UART_BASE(n) + 1)) /* divisor most significant byte register */ - -#define UART_FCR_TFR (0x04) /* Transmitter FIFO reset */ -#define UART_FCR_RFR (0x02) /* Receiver FIFO reset */ -#define UART_FCR_FEN (0x01) /* FIFO enable */ -#define UART_LCR_DLAB (0x80) /* Divisor latch access bit */ -#define UART_LCR_WLS (0x03) /* Word length select: 8-bits */ -#define UART_LSR_TEMT (0x40) /* Transmitter empty */ -#define UART_LSR_THRE (0x20) /* Transmitter holding register empty */ - - -/* T2080 IFC (Integrated Flash Controller) - RM 13.3 */ -#define IFC_BASE (CCSRBAR + 0x00124000) -#define IFC_MAX_BANKS 8 - -#define IFC_CSPR_EXT(n) *((volatile uint32_t*)(IFC_BASE + 0x000C + (n * 0xC))) /* Extended Base Address */ -#define IFC_CSPR(n) *((volatile uint32_t*)(IFC_BASE + 0x0010 + (n * 0xC))) /* Chip-select Property */ -#define IFC_AMASK(n) *((volatile uint32_t*)(IFC_BASE + 0x00A0 + (n * 0xC))) -#define IFC_CSOR(n) *((volatile uint32_t*)(IFC_BASE + 0x0130 + (n * 0xC))) -#define IFC_CSOR_EXT(n) *((volatile uint32_t*)(IFC_BASE + 0x0134 + (n * 0xC))) -#define IFC_FTIM0(n) *((volatile uint32_t*)(IFC_BASE + 0x01C0 + (n * 0x30))) -#define IFC_FTIM1(n) *((volatile uint32_t*)(IFC_BASE + 0x01C4 + (n * 0x30))) -#define IFC_FTIM2(n) *((volatile uint32_t*)(IFC_BASE + 0x01C8 + (n * 0x30))) -#define IFC_FTIM3(n) *((volatile uint32_t*)(IFC_BASE + 0x01CC + (n * 0x30))) - -#define IFC_CSPR_PHYS_ADDR(x) (((uint32_t)x) & 0xFFFF0000) /* Physical base address */ -#define IFC_CSPR_PORT_SIZE_8 0x00000080 /* Port Size 8 */ -#define IFC_CSPR_PORT_SIZE_16 0x00000100 /* Port Size 16 */ -#define IFC_CSPR_WP 0x00000040 /* Write Protect */ -#define IFC_CSPR_MSEL_NOR 0x00000000 /* Mode Select - NOR */ -#define IFC_CSPR_MSEL_NAND 0x00000002 /* Mode Select - NAND */ -#define IFC_CSPR_MSEL_GPCM 0x00000004 /* Mode Select - GPCM (General-purpose chip-select machine) */ -#define IFC_CSPR_V 0x00000001 /* Bank Valid */ - -/* NOR Timings (IFC clocks) */ -#define IFC_FTIM0_NOR_TACSE(n) (((n) & 0x0F) << 28) /* After address hold cycle */ -#define IFC_FTIM0_NOR_TEADC(n) (((n) & 0x3F) << 16) /* External latch address delay cycles */ -#define IFC_FTIM0_NOR_TAVDS(n) (((n) & 0x3F) << 8) /* Delay between CS assertion */ -#define IFC_FTIM0_NOR_TEAHC(n) (((n) & 0x3F) << 0) /* External latch address hold cycles */ -#define IFC_FTIM1_NOR_TACO(n) (((n) & 0xFF) << 24) /* CS assertion to output enable */ -#define IFC_FTIM1_NOR_TRAD(n) (((n) & 0x3F) << 8) /* read access delay */ -#define IFC_FTIM1_NOR_TSEQ(n) (((n) & 0x3F) << 0) /* sequential read access delay */ -#define IFC_FTIM2_NOR_TCS(n) (((n) & 0x0F) << 24) /* Chip-select assertion setup time */ -#define IFC_FTIM2_NOR_TCH(n) (((n) & 0x0F) << 18) /* Chip-select hold time */ -#define IFC_FTIM2_NOR_TWPH(n) (((n) & 0x3F) << 10) /* Chip-select hold time */ -#define IFC_FTIM2_NOR_TWP(n) (((n) & 0xFF) << 0) /* Write enable pulse width */ - -/* GPCM Timings (IFC clocks) */ -#define IFC_FTIM0_GPCM_TACSE(n) (((n) & 0x0F) << 28) /* After address hold cycle */ -#define IFC_FTIM0_GPCM_TEADC(n) (((n) & 0x3F) << 16) /* External latch address delay cycles */ -#define IFC_FTIM0_GPCM_TEAHC(n) (((n) & 0x3F) << 0) /* External latch address hold cycles */ -#define IFC_FTIM1_GPCM_TACO(n) (((n) & 0xFF) << 24) /* CS assertion to output enable */ -#define IFC_FTIM1_GPCM_TRAD(n) (((n) & 0x3F) << 8) /* read access delay */ -#define IFC_FTIM2_GPCM_TCS(n) (((n) & 0x0F) << 24) /* Chip-select assertion setup time */ -#define IFC_FTIM2_GPCM_TCH(n) (((n) & 0x0F) << 18) /* Chip-select hold time */ -#define IFC_FTIM2_GPCM_TWP(n) (((n) & 0xFF) << 0) /* Write enable pulse width */ - -/* IFC AMASK - RM Table 13-3 - Count of MSB minus 1 */ -enum ifc_amask_sizes { - IFC_AMASK_64KB = 0xFFFF0000, - IFC_AMASK_128KB = 0xFFFE0000, - IFC_AMASK_256KB = 0xFFFC0000, - IFC_AMASK_512KB = 0xFFF80000, - IFC_AMASK_1MB = 0xFFF00000, - IFC_AMASK_2MB = 0xFFE00000, - IFC_AMASK_4MB = 0xFFC00000, - IFC_AMASK_8MB = 0xFF800000, - IFC_AMASK_16MB = 0xFF000000, - IFC_AMASK_32MB = 0xFE000000, - IFC_AMASK_64MB = 0xFC000000, - IFC_AMASK_128MB = 0xF8000000, - IFC_AMASK_256MB = 0xF0000000, - IFC_AMASK_512MB = 0xE0000000, - IFC_AMASK_1GB = 0xC0000000, - IFC_AMASK_2GB = 0x80000000, - IFC_AMASK_4GB = 0x00000000, -}; - - -/* NOR Flash */ -#define FLASH_BASE 0xE8000000 - -#define FLASH_BANK_SIZE (128*1024*1024) -#define FLASH_PAGE_SIZE (1024) /* program buffer */ -#define FLASH_SECTOR_SIZE (128*1024) -#define FLASH_SECTORS (FLASH_BANK_SIZE / FLASH_SECTOR_SIZE) -#define FLASH_CFI_16BIT 0x02 /* word */ -#define FLASH_CFI_WIDTH FLASH_CFI_16BIT - -#define FLASH_ERASE_TOUT 60000 /* Flash Erase Timeout (ms) */ -#define FLASH_WRITE_TOUT 500 /* Flash Write Timeout (ms) */ - - -#if 0 - #define ENABLE_CPLD -#endif -/* CPLD */ -#define CPLD_BASE 0xFFDF0000 -#define CPLD_BASE_PHYS_HIGH 0xFULL - -#define CPLD_SPARE 0x00 -#define CPLD_SATA_MUX_SEL 0x02 -#define CPLD_BANK_SEL 0x04 -#define CPLD_FW_REV 0x06 -#define CPLD_TTL_RW 0x08 -#define CPLD_TTL_LPBK 0x0A -#define CPLD_TTL_DATA 0x0C -#define CPLD_PROC_STATUS 0x0E /* write 1 to enable proc reset function, reset default value is 0 */ -#define CPLD_FPGA_RDY 0x10 /* read only when reg read 0x0DB1 then fpga is ready */ -#define CPLD_PCIE_SW_RESET 0x12 /* write 1 to reset the PCIe switch */ -#define CPLD_WR_TTL_INT_EN 0x14 -#define CPLD_WR_TTL_INT_DIR 0x16 -#define CPLD_INT_STAT 0x18 -#define CPLD_WR_TEMP_ALM_OVRD 0x1A /* write 0 to enable temp shutdown. reset default value is 1 */ -#define CPLD_PWR_DWN_CMD 0x1C -#define CPLD_TEMP_ALM_INT_STAT 0x1E -#define CPLD_WR_TEMP_ALM_INT_EN 0x20 - -#define CPLD_FLASH_BANK_0 0x00 -#define CPLD_FLASH_BANK_1 0x01 - -#define CPLD_DATA(n) *((volatile uint8_t*)(CPLD_BASE + n)) - - -/* SATA */ -#define SATA_ENBL (*(volatile uint32_t *)(0xB1003F4C)) /* also saw 0xB4003F4C */ - -/* DDR */ -/* NAII 68PPC2 - 8GB discrete DDR3 IM8G08D3EBDG-15E */ -/* 1333.333 MT/s data rate 8 GiB (DDR3, 64-bit, CL=9, ECC on) */ -#define DDR_N_RANKS 2 -#define DDR_RANK_DENS 0x100000000 -#define DDR_SDRAM_WIDTH 64 -#define DDR_EC_SDRAM_W 8 -#define DDR_N_ROW_ADDR 16 -#define DDR_N_COL_ADDR 10 -#define DDR_N_BANKS 8 -#define DDR_EDC_CONFIG 2 -#define DDR_BURSTL_MASK 0x0c -#define DDR_TCKMIN_X_PS 1500 -#define DDR_TCMMAX_PS 3000 -#define DDR_CASLAT_X 0x000007E0 -#define DDR_TAA_PS 13500 -#define DDR_TRCD_PS 13500 -#define DDR_TRP_PS 13500 -#define DDR_TRAS_PS 36000 -#define DDR_TRC_PS 49500 -#define DDR_TFAW_PS 30000 -#define DDR_TWR_PS 15000 -#define DDR_TRFC_PS 260000 -#define DDR_TRRD_PS 6000 -#define DDR_TWTR_PS 7500 -#define DDR_TRTP_PS 7500 -#define DDR_REF_RATE_PS 7800000 - -#define DDR_CS0_BNDS_VAL 0x000000FF -#define DDR_CS1_BNDS_VAL 0x010001FF -#define DDR_CS2_BNDS_VAL 0x0300033F -#define DDR_CS3_BNDS_VAL 0x0340037F -#define DDR_CS0_CONFIG_VAL 0x80044402 -#define DDR_CS1_CONFIG_VAL 0x80044402 -#define DDR_CS2_CONFIG_VAL 0x00000202 -#define DDR_CS3_CONFIG_VAL 0x00040202 -#define DDR_CS_CONFIG_2_VAL 0x00000000 - -#define DDR_TIMING_CFG_0_VAL 0xFF530004 -#define DDR_TIMING_CFG_1_VAL 0x98906345 -#define DDR_TIMING_CFG_2_VAL 0x0040A114 -#define DDR_TIMING_CFG_3_VAL 0x010A1100 -#define DDR_TIMING_CFG_4_VAL 0x00000001 -#define DDR_TIMING_CFG_5_VAL 0x04402400 - -#define DDR_SDRAM_MODE_VAL 0x00441C70 -#define DDR_SDRAM_MODE_2_VAL 0x00980000 -#define DDR_SDRAM_MODE_3_8_VAL 0x00000000 -#define DDR_SDRAM_MD_CNTL_VAL 0x00000000 - -#define DDR_SDRAM_CFG_VAL 0xE7040000 -#define DDR_SDRAM_CFG_2_VAL 0x00401010 - -#define DDR_SDRAM_INTERVAL_VAL 0x0C300100 -#define DDR_DATA_INIT_VAL 0xDEADBEEF -#define DDR_SDRAM_CLK_CNTL_VAL 0x02400000 -#define DDR_ZQ_CNTL_VAL 0x89080600 - -#define DDR_WRLVL_CNTL_VAL 0x8675F604 -#define DDR_WRLVL_CNTL_2_VAL 0x05060607 -#define DDR_WRLVL_CNTL_3_VAL 0x080A0A0B - -#define DDR_SDRAM_RCW_1_VAL 0x00000000 -#define DDR_SDRAM_RCW_2_VAL 0x00000000 - -#define DDR_DDRCDR_1_VAL 0x80040000 -#define DDR_DDRCDR_2_VAL 0x00000001 - -#define DDR_ERR_INT_EN_VAL 0x0000001D -#define DDR_ERR_SBE_VAL 0x00010000 - - -/* 12.4 DDR Memory Map */ -#define DDR_BASE (CCSRBAR + 0x8000) - -#define DDR_CS_BNDS(n) *((volatile uint32_t*)(DDR_BASE + 0x000 + (n * 8))) /* Chip select n memory bounds */ -#define DDR_CS_CONFIG(n) *((volatile uint32_t*)(DDR_BASE + 0x080 + (n * 4))) /* Chip select n configuration */ -#define DDR_CS_CONFIG_2(n) *((volatile uint32_t*)(DDR_BASE + 0x0C0 + (n * 4))) /* Chip select n configuration 2 */ -#define DDR_SDRAM_CFG *((volatile uint32_t*)(DDR_BASE + 0x110)) /* DDR SDRAM control configuration */ -#define DDR_SDRAM_CFG_2 *((volatile uint32_t*)(DDR_BASE + 0x114)) /* DDR SDRAM control configuration 2 */ -#define DDR_SDRAM_INTERVAL *((volatile uint32_t*)(DDR_BASE + 0x124)) /* DDR SDRAM interval configuration */ -#define DDR_INIT_ADDR *((volatile uint32_t*)(DDR_BASE + 0x148)) /* DDR training initialization address */ -#define DDR_INIT_EXT_ADDR *((volatile uint32_t*)(DDR_BASE + 0x14C)) /* DDR training initialization extended address */ -#define DDR_DATA_INIT *((volatile uint32_t*)(DDR_BASE + 0x128)) /* DDR training initialization value */ -#define DDR_TIMING_CFG_0 *((volatile uint32_t*)(DDR_BASE + 0x104)) /* DDR SDRAM timing configuration 0 */ -#define DDR_TIMING_CFG_1 *((volatile uint32_t*)(DDR_BASE + 0x108)) /* DDR SDRAM timing configuration 1 */ -#define DDR_TIMING_CFG_2 *((volatile uint32_t*)(DDR_BASE + 0x10C)) /* DDR SDRAM timing configuration 2 */ -#define DDR_TIMING_CFG_3 *((volatile uint32_t*)(DDR_BASE + 0x100)) /* DDR SDRAM timing configuration 3 */ -#define DDR_TIMING_CFG_4 *((volatile uint32_t*)(DDR_BASE + 0x160)) /* DDR SDRAM timing configuration 4 */ -#define DDR_TIMING_CFG_5 *((volatile uint32_t*)(DDR_BASE + 0x164)) /* DDR SDRAM timing configuration 5 */ -#define DDR_TIMING_CFG_6 *((volatile uint32_t*)(DDR_BASE + 0x168)) /* DDR SDRAM timing configuration 6 */ -#define DDR_ZQ_CNTL *((volatile uint32_t*)(DDR_BASE + 0x170)) /* DDR ZQ calibration control */ -#define DDR_WRLVL_CNTL *((volatile uint32_t*)(DDR_BASE + 0x174)) /* DDR write leveling control */ -#define DDR_WRLVL_CNTL_2 *((volatile uint32_t*)(DDR_BASE + 0x190)) /* DDR write leveling control 2 */ -#define DDR_WRLVL_CNTL_3 *((volatile uint32_t*)(DDR_BASE + 0x194)) /* DDR write leveling control 3 */ -#define DDR_SR_CNTR *((volatile uint32_t*)(DDR_BASE + 0x17C)) /* DDR Self Refresh Counter */ -#define DDR_SDRAM_RCW_1 *((volatile uint32_t*)(DDR_BASE + 0x180)) /* DDR Register Control Word 1 */ -#define DDR_SDRAM_RCW_2 *((volatile uint32_t*)(DDR_BASE + 0x184)) /* DDR Register Control Word 2 */ -#define DDR_DDRCDR_1 *((volatile uint32_t*)(DDR_BASE + 0xB28)) /* DDR Control Driver Register 1 */ -#define DDR_DDRCDR_2 *((volatile uint32_t*)(DDR_BASE + 0xB2C)) /* DDR Control Driver Register 2 */ -#define DDR_DDRDSR_1 *((volatile uint32_t*)(DDR_BASE + 0xB20)) /* DDR Debug Status Register 1 */ -#define DDR_DDRDSR_2 *((volatile uint32_t*)(DDR_BASE + 0xB24)) /* DDR Debug Status Register 2 */ -#define DDR_ERR_DISABLE *((volatile uint32_t*)(DDR_BASE + 0xE44)) /* Memory error disable */ -#define DDR_ERR_INT_EN *((volatile uint32_t*)(DDR_BASE + 0xE48)) /* Memory error interrupt enable */ -#define DDR_ERR_SBE *((volatile uint32_t*)(DDR_BASE + 0xE58)) /* Single-Bit ECC memory error management */ -#define DDR_SDRAM_MODE *((volatile uint32_t*)(DDR_BASE + 0x118)) /* DDR SDRAM mode configuration */ -#define DDR_SDRAM_MODE_2 *((volatile uint32_t*)(DDR_BASE + 0x11C)) /* DDR SDRAM mode configuration 2 */ -#define DDR_SDRAM_MODE_3 *((volatile uint32_t*)(DDR_BASE + 0x200)) /* DDR SDRAM mode configuration 3 */ -#define DDR_SDRAM_MODE_4 *((volatile uint32_t*)(DDR_BASE + 0x204)) /* DDR SDRAM mode configuration 4 */ -#define DDR_SDRAM_MODE_5 *((volatile uint32_t*)(DDR_BASE + 0x208)) /* DDR SDRAM mode configuration 5 */ -#define DDR_SDRAM_MODE_6 *((volatile uint32_t*)(DDR_BASE + 0x20C)) /* DDR SDRAM mode configuration 6 */ -#define DDR_SDRAM_MODE_7 *((volatile uint32_t*)(DDR_BASE + 0x210)) /* DDR SDRAM mode configuration 7 */ -#define DDR_SDRAM_MODE_8 *((volatile uint32_t*)(DDR_BASE + 0x214)) /* DDR SDRAM mode configuration 8 */ -#define DDR_SDRAM_MD_CNTL *((volatile uint32_t*)(DDR_BASE + 0x120)) /* DDR SDRAM mode control */ -#define DDR_SDRAM_CLK_CNTL *((volatile uint32_t*)(DDR_BASE + 0x130)) /* DDR SDRAM clock control */ - -#define DDR_SDRAM_CFG_MEM_EN 0x80000000 /* SDRAM interface logic is enabled */ -#define DDR_SDRAM_CFG_2_D_INIT 0x00000010 /* data initialization in progress */ - - -/* generic share NXP QorIQ driver code */ +/* generic shared NXP QorIQ driver code */ #include "nxp_ppc.c" @@ -307,20 +39,20 @@ void uart_init(void) */ uint32_t div = (((SYS_CLK / 2.0) / (16 * BAUD_RATE)) + 0.5); - while (!(UART_LSR(UART_SEL) & UART_LSR_TEMT)) + while (!(get8(UART_LSR(UART_SEL)) & UART_LSR_TEMT)) ; /* set ier, fcr, mcr */ - UART_IER(UART_SEL) = 0; - UART_FCR(UART_SEL) = (UART_FCR_TFR | UART_FCR_RFR | UART_FCR_FEN); + set8(UART_IER(UART_SEL), 0); + set8(UART_FCR(UART_SEL), (UART_FCR_TFR | UART_FCR_RFR | UART_FCR_FEN)); /* enable baud rate access (DLAB=1) - divisor latch access bit*/ - UART_LCR(UART_SEL) = (UART_LCR_DLAB | UART_LCR_WLS); + set8(UART_LCR(UART_SEL), (UART_LCR_DLAB | UART_LCR_WLS)); /* set divisor */ - UART_DLB(UART_SEL) = (div & 0xff); - UART_DMB(UART_SEL) = ((div>>8) & 0xff); + set8(UART_DLB(UART_SEL), (div & 0xff)); + set8(UART_DMB(UART_SEL), ((div>>8) & 0xff)); /* disable rate access (DLAB=0) */ - UART_LCR(UART_SEL) = (UART_LCR_WLS); + set8(UART_LCR(UART_SEL), (UART_LCR_WLS)); } void uart_write(const char* buf, uint32_t sz) @@ -329,11 +61,11 @@ void uart_write(const char* buf, uint32_t sz) while (sz-- > 0) { char c = buf[pos++]; if (c == '\n') { /* handle CRLF */ - while ((UART_LSR(UART_SEL) & UART_LSR_THRE) == 0); - UART_THR(UART_SEL) = '\r'; + while ((get8(UART_LSR(UART_SEL)) & UART_LSR_THRE) == 0); + set8(UART_THR(UART_SEL), '\r'); } - while ((UART_LSR(UART_SEL) & UART_LSR_THRE) == 0); - UART_THR(UART_SEL) = c; + while ((get8(UART_LSR(UART_SEL)) & UART_LSR_THRE) == 0); + set8(UART_THR(UART_SEL), c); } } #endif /* DEBUG_UART */ @@ -344,6 +76,13 @@ void law_init(void) set_law(3, 0xF, 0xF4000000, LAW_TRGT_BMAN, LAW_SIZE_32MB, 1); } +/* Delay helper using timebase */ +#define DELAY_US (SYS_CLK / 1000000) +static void udelay(uint32_t delay_us) +{ + wait_ticks(delay_us * DELAY_US); +} + static void hal_flash_init(void) { /* IFC - NOR Flash */ @@ -351,107 +90,138 @@ static void hal_flash_init(void) set_law(1, FLASH_BASE_PHYS_HIGH, FLASH_BASE, LAW_TRGT_IFC, LAW_SIZE_128MB, 1); /* NOR IFC Flash Timing Parameters */ - IFC_FTIM0(0) = (IFC_FTIM0_NOR_TACSE(4) | \ - IFC_FTIM0_NOR_TEADC(5) | \ - IFC_FTIM0_NOR_TEAHC(5)); - IFC_FTIM1(0) = (IFC_FTIM1_NOR_TACO(53) | - IFC_FTIM1_NOR_TRAD(26) | - IFC_FTIM1_NOR_TSEQ(19)); - IFC_FTIM2(0) = (IFC_FTIM2_NOR_TCS(4) | - IFC_FTIM2_NOR_TCH(4) | - IFC_FTIM2_NOR_TWPH(14) | - IFC_FTIM2_NOR_TWP(28)); - IFC_FTIM3(0) = 0; + set32(IFC_FTIM0(0), (IFC_FTIM0_NOR_TACSE(4) | + IFC_FTIM0_NOR_TEADC(5) | + IFC_FTIM0_NOR_TEAHC(5))); + set32(IFC_FTIM1(0), (IFC_FTIM1_NOR_TACO(53) | + IFC_FTIM1_NOR_TRAD(26) | + IFC_FTIM1_NOR_TSEQ(19))); + set32(IFC_FTIM2(0), (IFC_FTIM2_NOR_TCS(4) | + IFC_FTIM2_NOR_TCH(4) | + IFC_FTIM2_NOR_TWPH(14) | + IFC_FTIM2_NOR_TWP(28))); + set32(IFC_FTIM3(0), 0); /* NOR IFC Definitions (CS0) */ - IFC_CSPR_EXT(0) = (0xF); - IFC_CSPR(0) = (IFC_CSPR_PHYS_ADDR(FLASH_BASE) | \ - IFC_CSPR_PORT_SIZE_16 | \ - IFC_CSPR_MSEL_NOR | \ - IFC_CSPR_V); - IFC_AMASK(0) = IFC_AMASK_128MB; - IFC_CSOR(0) = 0x0000000C; /* TRHZ (80 clocks for read enable high) */ + set32(IFC_CSPR_EXT(0), 0xF); + set32(IFC_CSPR(0), (IFC_CSPR_PHYS_ADDR(FLASH_BASE) | + IFC_CSPR_PORT_SIZE_16 | + IFC_CSPR_MSEL_NOR | + IFC_CSPR_V)); + set32(IFC_AMASK(0), IFC_AMASK_128MB); + set32(IFC_CSOR(0), 0x0000000C); /* TRHZ (80 clocks for read enable high) */ } static void hal_ddr_init(void) { #ifdef ENABLE_DDR + uint32_t reg; + /* Map LAW for DDR */ - set_law(4, 0, 0, LAW_TRGT_DDR_1, LAW_SIZE_2GB, 0); + set_law(4, 0, DDR_ADDRESS, LAW_TRGT_DDR_1, LAW_SIZE_2GB, 0); /* If DDR is already enabled then just return */ - if (DDR_SDRAM_CFG & DDR_SDRAM_CFG_MEM_EN) { + if (get32(DDR_SDRAM_CFG) & DDR_SDRAM_CFG_MEM_EN) { return; } + /* Set clock early for clock / pin */ + set32(DDR_SDRAM_CLK_CNTL, DDR_SDRAM_CLK_CNTL_VAL); + /* Setup DDR CS (chip select) bounds */ - DDR_CS_BNDS(0) = DDR_CS0_BNDS_VAL; - DDR_CS_CONFIG(0) = DDR_CS0_CONFIG_VAL; - DDR_CS_CONFIG_2(0) = DDR_CS_CONFIG_2_VAL; - DDR_CS_BNDS(1) = DDR_CS1_BNDS_VAL; - DDR_CS_CONFIG(1) = DDR_CS1_CONFIG_VAL; - DDR_CS_CONFIG_2(1) = DDR_CS_CONFIG_2_VAL; - DDR_CS_BNDS(2) = DDR_CS2_BNDS_VAL; - DDR_CS_CONFIG(2) = DDR_CS2_CONFIG_VAL; - DDR_CS_CONFIG_2(2) = DDR_CS_CONFIG_2_VAL; - DDR_CS_BNDS(3) = DDR_CS3_BNDS_VAL; - DDR_CS_CONFIG(3) = DDR_CS3_CONFIG_VAL; - DDR_CS_CONFIG_2(3) = DDR_CS_CONFIG_2_VAL; + set32(DDR_CS_BNDS(0), DDR_CS0_BNDS_VAL); + set32(DDR_CS_CONFIG(0), DDR_CS0_CONFIG_VAL); + set32(DDR_CS_CONFIG_2(0), DDR_CS_CONFIG_2_VAL); + set32(DDR_CS_BNDS(1), DDR_CS1_BNDS_VAL); + set32(DDR_CS_CONFIG(1), DDR_CS1_CONFIG_VAL); + set32(DDR_CS_CONFIG_2(1), DDR_CS_CONFIG_2_VAL); + set32(DDR_CS_BNDS(2), DDR_CS2_BNDS_VAL); + set32(DDR_CS_CONFIG(2), DDR_CS2_CONFIG_VAL); + set32(DDR_CS_CONFIG_2(2), DDR_CS_CONFIG_2_VAL); + set32(DDR_CS_BNDS(3), DDR_CS3_BNDS_VAL); + set32(DDR_CS_CONFIG(3), DDR_CS3_CONFIG_VAL); + set32(DDR_CS_CONFIG_2(3), DDR_CS_CONFIG_2_VAL); /* DDR SDRAM timing configuration */ - DDR_TIMING_CFG_0 = DDR_TIMING_CFG_0_VAL; - DDR_TIMING_CFG_1 = DDR_TIMING_CFG_1_VAL; - DDR_TIMING_CFG_2 = DDR_TIMING_CFG_2_VAL; - DDR_TIMING_CFG_3 = DDR_TIMING_CFG_3_VAL; - DDR_TIMING_CFG_4 = DDR_TIMING_CFG_4_VAL; - DDR_TIMING_CFG_5 = DDR_TIMING_CFG_5_VAL; + set32(DDR_TIMING_CFG_3, DDR_TIMING_CFG_3_VAL); + set32(DDR_TIMING_CFG_0, DDR_TIMING_CFG_0_VAL); + set32(DDR_TIMING_CFG_1, DDR_TIMING_CFG_1_VAL); + set32(DDR_TIMING_CFG_2, DDR_TIMING_CFG_2_VAL); + set32(DDR_TIMING_CFG_4, DDR_TIMING_CFG_4_VAL); + set32(DDR_TIMING_CFG_5, DDR_TIMING_CFG_5_VAL); + + set32(DDR_ZQ_CNTL, DDR_ZQ_CNTL_VAL); /* DDR SDRAM mode configuration */ - DDR_SDRAM_MODE = DDR_SDRAM_MODE_VAL; - DDR_SDRAM_MODE_2 = DDR_SDRAM_MODE_2_VAL; - DDR_SDRAM_MODE_3 = DDR_SDRAM_MODE_3_8_VAL; - DDR_SDRAM_MODE_4 = DDR_SDRAM_MODE_3_8_VAL; - DDR_SDRAM_MODE_5 = DDR_SDRAM_MODE_3_8_VAL; - DDR_SDRAM_MODE_6 = DDR_SDRAM_MODE_3_8_VAL; - DDR_SDRAM_MODE_7 = DDR_SDRAM_MODE_3_8_VAL; - DDR_SDRAM_MODE_8 = DDR_SDRAM_MODE_3_8_VAL; - DDR_SDRAM_MD_CNTL = DDR_SDRAM_MD_CNTL_VAL; + set32(DDR_SDRAM_MODE, DDR_SDRAM_MODE_VAL); + set32(DDR_SDRAM_MODE_2, DDR_SDRAM_MODE_2_VAL); + set32(DDR_SDRAM_MODE_3, DDR_SDRAM_MODE_3_8_VAL); + set32(DDR_SDRAM_MODE_4, DDR_SDRAM_MODE_3_8_VAL); + set32(DDR_SDRAM_MODE_5, DDR_SDRAM_MODE_3_8_VAL); + set32(DDR_SDRAM_MODE_6, DDR_SDRAM_MODE_3_8_VAL); + set32(DDR_SDRAM_MODE_7, DDR_SDRAM_MODE_3_8_VAL); + set32(DDR_SDRAM_MODE_8, DDR_SDRAM_MODE_3_8_VAL); + set32(DDR_SDRAM_MD_CNTL, DDR_SDRAM_MD_CNTL_VAL); /* DDR Configuration */ - DDR_SDRAM_INTERVAL = DDR_SDRAM_INTERVAL_VAL; - DDR_SDRAM_CLK_CNTL = DDR_SDRAM_CLK_CNTL_VAL; - DDR_DATA_INIT = DDR_DATA_INIT_VAL; - DDR_ZQ_CNTL = DDR_ZQ_CNTL_VAL; - DDR_WRLVL_CNTL = DDR_WRLVL_CNTL_VAL; - DDR_WRLVL_CNTL_2 = DDR_WRLVL_CNTL_2_VAL; - DDR_WRLVL_CNTL_3 = DDR_WRLVL_CNTL_3_VAL; - DDR_SR_CNTR = 0; - DDR_SDRAM_RCW_1 = 0; - DDR_SDRAM_RCW_2 = 0; - DDR_DDRCDR_1 = DDR_DDRCDR_1_VAL; - DDR_DDRCDR_2 = DDR_DDRCDR_2_VAL; - DDR_SDRAM_CFG_2 = DDR_SDRAM_CFG_2_VAL; - DDR_INIT_ADDR = 0; - DDR_INIT_EXT_ADDR = 0; - DDR_ERR_DISABLE = 0; - DDR_ERR_INT_EN = DDR_ERR_INT_EN_VAL; - DDR_ERR_SBE = DDR_ERR_SBE_VAL; + set32(DDR_SDRAM_INTERVAL, DDR_SDRAM_INTERVAL_VAL); + set32(DDR_DATA_INIT, DDR_DATA_INIT_VAL); + set32(DDR_WRLVL_CNTL, DDR_WRLVL_CNTL_VAL); + set32(DDR_WRLVL_CNTL_2, DDR_WRLVL_CNTL_2_VAL); + set32(DDR_WRLVL_CNTL_3, DDR_WRLVL_CNTL_3_VAL); + set32(DDR_SR_CNTR, 0); + set32(DDR_SDRAM_RCW_1, 0); + set32(DDR_SDRAM_RCW_2, 0); + set32(DDR_DDRCDR_1, DDR_DDRCDR_1_VAL); + set32(DDR_SDRAM_CFG_2, (DDR_SDRAM_CFG_2_VAL | DDR_SDRAM_CFG_2_D_INIT)); + set32(DDR_INIT_ADDR, 0); + set32(DDR_INIT_EXT_ADDR, 0); + set32(DDR_DDRCDR_2, DDR_DDRCDR_2_VAL); + set32(DDR_ERR_DISABLE, 0); + set32(DDR_ERR_INT_EN, DDR_ERR_INT_EN_VAL); + set32(DDR_ERR_SBE, DDR_ERR_SBE_VAL); /* Set values, but do not enable the DDR yet */ - DDR_SDRAM_CFG = (DDR_SDRAM_CFG_VAL & ~DDR_SDRAM_CFG_MEM_EN); + set32(DDR_SDRAM_CFG, DDR_SDRAM_CFG_VAL & ~DDR_SDRAM_CFG_MEM_EN); + __asm__ __volatile__("sync;isync"); - /* TODO: Errata A009942 */ + /* busy wait for ~500us */ + udelay(500); + __asm__ __volatile__("sync;isync"); /* Enable controller */ - DDR_SDRAM_CFG |= DDR_SDRAM_CFG_MEM_EN; + reg = get32(DDR_SDRAM_CFG) & ~DDR_SDRAM_CFG_BI; + set32(DDR_SDRAM_CFG, reg | DDR_SDRAM_CFG_MEM_EN); __asm__ __volatile__("sync;isync"); - /* Wait for data initialization is complete */ - while ((DDR_SDRAM_CFG_2 & DDR_SDRAM_CFG_2_D_INIT)); -#endif + /* Wait for data initialization to complete */ + while (get32(DDR_SDRAM_CFG_2) & DDR_SDRAM_CFG_2_D_INIT) { + /* busy wait loop - throttle polling */ + udelay(10000); + } +#endif /* ENABLE_DDR */ } void hal_early_init(void) { + /* Enable timebase on core 0 */ + set32(RCPM_PCTBENR, (1 << 0)); + + /* Only invalidate the CPC if it is NOT configured as SRAM. + * When CPC SRAM is active (used as stack), writing CPCFI|CPCLFC + * without preserving CPCE would disable the CPC and corrupt the + * stack. Skip invalidation when SRAMEN is set (T2080RM 8.4.2.2). */ + if (!(get32((volatile uint32_t*)(CPC_BASE + CPCSRCR0)) & CPCSRCR0_SRAMEN)) { + set32((volatile uint32_t*)(CPC_BASE + CPCCSR0), + (CPCCSR0_CPCFI | CPCCSR0_CPCLFC)); + /* Wait for self-clearing invalidate bits */ + while (get32((volatile uint32_t*)(CPC_BASE + CPCCSR0)) & + (CPCCSR0_CPCFI | CPCCSR0_CPCLFC)); + } + + /* Set DCSR space = 1G */ + set32(DCFG_DCSR, (get32(DCFG_DCSR) | CORENET_DCSR_SZ_1G)); + get32(DCFG_DCSR); /* read back to sync */ + hal_ddr_init(); } @@ -459,24 +229,24 @@ static void hal_cpld_init(void) { #ifdef ENABLE_CPLD /* CPLD IFC Timing Parameters */ - IFC_FTIM0(3) = (IFC_FTIM0_GPCM_TACSE(16UL) | - IFC_FTIM0_GPCM_TEADC(16UL) | - IFC_FTIM0_GPCM_TEAHC(16UL)); - IFC_FTIM1(3) = (IFC_FTIM1_GPCM_TACO(16UL) | - IFC_FTIM1_GPCM_TRAD(31UL)); - IFC_FTIM2(3) = (IFC_FTIM2_GPCM_TCS(16UL) | - IFC_FTIM2_GPCM_TCH(8UL) | - IFC_FTIM2_GPCM_TWP(31UL)); - IFC_FTIM3(3) = 0; + set32(IFC_FTIM0(3), (IFC_FTIM0_GPCM_TACSE(16UL) | + IFC_FTIM0_GPCM_TEADC(16UL) | + IFC_FTIM0_GPCM_TEAHC(16UL))); + set32(IFC_FTIM1(3), (IFC_FTIM1_GPCM_TACO(16UL) | + IFC_FTIM1_GPCM_TRAD(31UL))); + set32(IFC_FTIM2(3), (IFC_FTIM2_GPCM_TCS(16UL) | + IFC_FTIM2_GPCM_TCH(8UL) | + IFC_FTIM2_GPCM_TWP(31UL))); + set32(IFC_FTIM3(3), 0); /* CPLD IFC Definitions (CS3) */ - IFC_CSPR_EXT(3) = CPLD_BASE_PHYS_HIGH; - IFC_CSPR(3) = (IFC_CSPR_PHYS_ADDR(CPLD_BASE) | - IFC_CSPR_PORT_SIZE_16 | - IFC_CSPR_MSEL_GPCM | - IFC_CSPR_V); - IFC_AMASK(3) = IFC_AMASK_64KB; - IFC_CSOR(3) = 0; + set32(IFC_CSPR_EXT(3), CPLD_BASE_PHYS_HIGH); + set32(IFC_CSPR(3), (IFC_CSPR_PHYS_ADDR(CPLD_BASE) | + IFC_CSPR_PORT_SIZE_16 | + IFC_CSPR_MSEL_GPCM | + IFC_CSPR_V)); + set32(IFC_AMASK(3), IFC_AMASK_64KB); + set32(IFC_CSOR(3), 0); /* IFC - CPLD */ set_law(2, CPLD_BASE_PHYS_HIGH, CPLD_BASE, @@ -495,6 +265,9 @@ void hal_init(void) uint32_t fw; #endif + /* Enable timebase on core 0 */ + set32(RCPM_PCTBENR, (1 << 0)); + law_init(); #ifdef DEBUG_UART @@ -506,19 +279,14 @@ void hal_init(void) hal_cpld_init(); #ifdef ENABLE_CPLD - CPLD_DATA(CPLD_PROC_STATUS) = 1; /* Enable proc reset */ - CPLD_DATA(CPLD_WR_TEMP_ALM_OVRD) = 0; /* Enable temp alarm */ + set8(CPLD_DATA(CPLD_PROC_STATUS), 1); /* Enable proc reset */ + set8(CPLD_DATA(CPLD_WR_TEMP_ALM_OVRD), 0); /* Enable temp alarm */ #ifdef DEBUG_UART - fw = CPLD_DATA(CPLD_FW_REV); + fw = get8(CPLD_DATA(CPLD_FW_REV)); wolfBoot_printf("CPLD FW Rev: 0x%x\n", fw); #endif #endif /* ENABLE_CPLD */ - -#if 0 /* not tested */ - /* Disable SATA Write Protection */ - SATA_ENBL = 0; -#endif } int hal_flash_write(uint32_t address, const uint8_t *data, int len) diff --git a/hal/nxp_t2080.h b/hal/nxp_t2080.h new file mode 100644 index 0000000000..7f5b492b12 --- /dev/null +++ b/hal/nxp_t2080.h @@ -0,0 +1,312 @@ +/* nxp_t2080.h + * + * Copyright (C) 2025 wolfSSL Inc. + * + * This file is part of wolfBoot. + * + * wolfBoot is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfBoot is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + * + * Board: NAII 68PPC2 + * NXP T2080E Rev 1.1, e6500 core 2.0, PVR 8040_0120 and SVR 8538_0011 + */ + +#ifndef NXP_T2080_H +#define NXP_T2080_H + +#include "nxp_ppc.h" + +/* T2080 System Clock */ +#define SYS_CLK (600000000) /* 100MHz PLL with 6:1 = 600 MHz */ + +/* ---- UART (PC16552D Dual UART) ---- */ +#define BAUD_RATE 115200 +#define UART_SEL 0 /* select UART 0 or 1 */ + +#define UART_BASE(n) (CCSRBAR + 0x11C500 + (n * 0x1000)) + +#define UART_RBR(n) ((volatile uint8_t*)(UART_BASE(n) + 0)) /* receiver buffer register */ +#define UART_THR(n) ((volatile uint8_t*)(UART_BASE(n) + 0)) /* transmitter holding register */ +#define UART_IER(n) ((volatile uint8_t*)(UART_BASE(n) + 1)) /* interrupt enable register */ +#define UART_IIR(n) ((volatile uint8_t*)(UART_BASE(n) + 2)) /* interrupt ID register */ +#define UART_FCR(n) ((volatile uint8_t*)(UART_BASE(n) + 2)) /* FIFO control register */ +#define UART_LCR(n) ((volatile uint8_t*)(UART_BASE(n) + 3)) /* line control register */ +#define UART_MCR(n) ((volatile uint8_t*)(UART_BASE(n) + 4)) /* modem control register */ +#define UART_LSR(n) ((volatile uint8_t*)(UART_BASE(n) + 5)) /* line status register */ + +/* enabled when UART_LCR_DLAB set */ +#define UART_DLB(n) ((volatile uint8_t*)(UART_BASE(n) + 0)) /* divisor least significant byte register */ +#define UART_DMB(n) ((volatile uint8_t*)(UART_BASE(n) + 1)) /* divisor most significant byte register */ + +#define UART_FCR_TFR (0x04) /* Transmitter FIFO reset */ +#define UART_FCR_RFR (0x02) /* Receiver FIFO reset */ +#define UART_FCR_FEN (0x01) /* FIFO enable */ +#define UART_LCR_DLAB (0x80) /* Divisor latch access bit */ +#define UART_LCR_WLS (0x03) /* Word length select: 8-bits */ +#define UART_LSR_TEMT (0x40) /* Transmitter empty */ +#define UART_LSR_THRE (0x20) /* Transmitter holding register empty */ + + +/* ---- IFC (Integrated Flash Controller) - T2080RM 13.3 ---- */ +#define IFC_BASE (CCSRBAR + 0x00124000) +#define IFC_MAX_BANKS 8 + +#define IFC_CSPR_EXT(n) ((volatile uint32_t*)(IFC_BASE + 0x000C + (n * 0xC))) /* Extended Base Address */ +#define IFC_CSPR(n) ((volatile uint32_t*)(IFC_BASE + 0x0010 + (n * 0xC))) /* Chip-select Property */ +#define IFC_AMASK(n) ((volatile uint32_t*)(IFC_BASE + 0x00A0 + (n * 0xC))) +#define IFC_CSOR(n) ((volatile uint32_t*)(IFC_BASE + 0x0130 + (n * 0xC))) +#define IFC_CSOR_EXT(n) ((volatile uint32_t*)(IFC_BASE + 0x0134 + (n * 0xC))) +#define IFC_FTIM0(n) ((volatile uint32_t*)(IFC_BASE + 0x01C0 + (n * 0x30))) +#define IFC_FTIM1(n) ((volatile uint32_t*)(IFC_BASE + 0x01C4 + (n * 0x30))) +#define IFC_FTIM2(n) ((volatile uint32_t*)(IFC_BASE + 0x01C8 + (n * 0x30))) +#define IFC_FTIM3(n) ((volatile uint32_t*)(IFC_BASE + 0x01CC + (n * 0x30))) + +#define IFC_CSPR_PHYS_ADDR(x) (((uint32_t)x) & 0xFFFF0000) /* Physical base address */ +#define IFC_CSPR_PORT_SIZE_8 0x00000080 /* Port Size 8 */ +#define IFC_CSPR_PORT_SIZE_16 0x00000100 /* Port Size 16 */ +#define IFC_CSPR_WP 0x00000040 /* Write Protect */ +#define IFC_CSPR_MSEL_NOR 0x00000000 /* Mode Select - NOR */ +#define IFC_CSPR_MSEL_NAND 0x00000002 /* Mode Select - NAND */ +#define IFC_CSPR_MSEL_GPCM 0x00000004 /* Mode Select - GPCM (General-purpose chip-select machine) */ +#define IFC_CSPR_V 0x00000001 /* Bank Valid */ + +/* NOR Timings (IFC clocks) */ +#define IFC_FTIM0_NOR_TACSE(n) (((n) & 0x0F) << 28) /* After address hold cycle */ +#define IFC_FTIM0_NOR_TEADC(n) (((n) & 0x3F) << 16) /* External latch address delay cycles */ +#define IFC_FTIM0_NOR_TAVDS(n) (((n) & 0x3F) << 8) /* Delay between CS assertion */ +#define IFC_FTIM0_NOR_TEAHC(n) (((n) & 0x3F) << 0) /* External latch address hold cycles */ +#define IFC_FTIM1_NOR_TACO(n) (((n) & 0xFF) << 24) /* CS assertion to output enable */ +#define IFC_FTIM1_NOR_TRAD(n) (((n) & 0x3F) << 8) /* read access delay */ +#define IFC_FTIM1_NOR_TSEQ(n) (((n) & 0x3F) << 0) /* sequential read access delay */ +#define IFC_FTIM2_NOR_TCS(n) (((n) & 0x0F) << 24) /* Chip-select assertion setup time */ +#define IFC_FTIM2_NOR_TCH(n) (((n) & 0x0F) << 18) /* Chip-select hold time */ +#define IFC_FTIM2_NOR_TWPH(n) (((n) & 0x3F) << 10) /* Chip-select hold time */ +#define IFC_FTIM2_NOR_TWP(n) (((n) & 0xFF) << 0) /* Write enable pulse width */ + +/* GPCM Timings (IFC clocks) */ +#define IFC_FTIM0_GPCM_TACSE(n) (((n) & 0x0F) << 28) /* After address hold cycle */ +#define IFC_FTIM0_GPCM_TEADC(n) (((n) & 0x3F) << 16) /* External latch address delay cycles */ +#define IFC_FTIM0_GPCM_TEAHC(n) (((n) & 0x3F) << 0) /* External latch address hold cycles */ +#define IFC_FTIM1_GPCM_TACO(n) (((n) & 0xFF) << 24) /* CS assertion to output enable */ +#define IFC_FTIM1_GPCM_TRAD(n) (((n) & 0x3F) << 8) /* read access delay */ +#define IFC_FTIM2_GPCM_TCS(n) (((n) & 0x0F) << 24) /* Chip-select assertion setup time */ +#define IFC_FTIM2_GPCM_TCH(n) (((n) & 0x0F) << 18) /* Chip-select hold time */ +#define IFC_FTIM2_GPCM_TWP(n) (((n) & 0xFF) << 0) /* Write enable pulse width */ + +/* IFC AMASK - RM Table 13-3 - Count of MSB minus 1 */ +enum ifc_amask_sizes { + IFC_AMASK_64KB = 0xFFFF0000, + IFC_AMASK_128KB = 0xFFFE0000, + IFC_AMASK_256KB = 0xFFFC0000, + IFC_AMASK_512KB = 0xFFF80000, + IFC_AMASK_1MB = 0xFFF00000, + IFC_AMASK_2MB = 0xFFE00000, + IFC_AMASK_4MB = 0xFFC00000, + IFC_AMASK_8MB = 0xFF800000, + IFC_AMASK_16MB = 0xFF000000, + IFC_AMASK_32MB = 0xFE000000, + IFC_AMASK_64MB = 0xFC000000, + IFC_AMASK_128MB = 0xF8000000, + IFC_AMASK_256MB = 0xF0000000, + IFC_AMASK_512MB = 0xE0000000, + IFC_AMASK_1GB = 0xC0000000, + IFC_AMASK_2GB = 0x80000000, + IFC_AMASK_4GB = 0x00000000, +}; + + +/* ---- NOR Flash ---- */ +#define FLASH_BASE 0xE8000000 + +#define FLASH_BANK_SIZE (128*1024*1024) +#define FLASH_PAGE_SIZE (1024) /* program buffer */ +#define FLASH_SECTOR_SIZE (128*1024) +#define FLASH_SECTORS (FLASH_BANK_SIZE / FLASH_SECTOR_SIZE) +#define FLASH_CFI_16BIT 0x02 /* word */ +#define FLASH_CFI_WIDTH FLASH_CFI_16BIT + +#define FLASH_ERASE_TOUT 60000 /* Flash Erase Timeout (ms) */ +#define FLASH_WRITE_TOUT 500 /* Flash Write Timeout (ms) */ + + +/* ---- CPLD ---- */ +#if 0 + #define ENABLE_CPLD +#endif +#define CPLD_BASE 0xFFDF0000 +#define CPLD_BASE_PHYS_HIGH 0xFULL + +#define CPLD_SPARE 0x00 +#define CPLD_SATA_MUX_SEL 0x02 +#define CPLD_BANK_SEL 0x04 +#define CPLD_FW_REV 0x06 +#define CPLD_TTL_RW 0x08 +#define CPLD_TTL_LPBK 0x0A +#define CPLD_TTL_DATA 0x0C +#define CPLD_PROC_STATUS 0x0E /* write 1 to enable proc reset function, reset default value is 0 */ +#define CPLD_FPGA_RDY 0x10 /* read only when reg read 0x0DB1 then fpga is ready */ +#define CPLD_PCIE_SW_RESET 0x12 /* write 1 to reset the PCIe switch */ +#define CPLD_WR_TTL_INT_EN 0x14 +#define CPLD_WR_TTL_INT_DIR 0x16 +#define CPLD_INT_STAT 0x18 +#define CPLD_WR_TEMP_ALM_OVRD 0x1A /* write 0 to enable temp shutdown. reset default value is 1 */ +#define CPLD_PWR_DWN_CMD 0x1C +#define CPLD_TEMP_ALM_INT_STAT 0x1E +#define CPLD_WR_TEMP_ALM_INT_EN 0x20 + +#define CPLD_FLASH_BANK_0 0x00 +#define CPLD_FLASH_BANK_1 0x01 + +#define CPLD_DATA(n) ((volatile uint8_t*)(CPLD_BASE + n)) + + +/* ---- SATA ---- */ +#define SATA_ENBL ((volatile uint32_t*)0xB1003F4C) /* also saw 0xB4003F4C */ + + +/* ---- DCFG (Device Configuration) - T2080RM 6.3 ---- */ +#define DCFG_BASE (CCSRBAR + 0xE0000) +#define DCFG_DCSR ((volatile uint32_t*)(DCFG_BASE + 0x704)) /* Debug Configuration and Status */ +#define DCFG_DEVDISR1 ((volatile uint32_t*)(DCFG_BASE + 0x070)) /* Device Disable Control 1 */ +#define DCFG_DEVDISR2 ((volatile uint32_t*)(DCFG_BASE + 0x074)) /* Device Disable Control 2 */ +#define DCFG_DEVDISR3 ((volatile uint32_t*)(DCFG_BASE + 0x078)) /* Device Disable Control 3 */ +#define DCFG_DEVDISR4 ((volatile uint32_t*)(DCFG_BASE + 0x07C)) /* Device Disable Control 4 */ +#define DCFG_DEVDISR5 ((volatile uint32_t*)(DCFG_BASE + 0x080)) /* Device Disable Control 5 */ + +/* ---- RCPM (Run Control and Power Management) - T2080RM 6.4 ---- */ +#define RCPM_BASE (CCSRBAR + 0xE2000) +#define RCPM_PCTBENR ((volatile uint32_t*)(RCPM_BASE + 0x1A0)) /* Physical Core Timebase Enable */ + + +/* ---- DDR (T2080RM 12.4) ---- */ +/* NAII 68PPC2 - 8GB discrete DDR3 IM8G08D3EBDG-15E */ +/* 1333.333 MT/s data rate 8 GiB (DDR3, 64-bit, CL=9, ECC on) */ +#define DDR_N_RANKS 2 +#define DDR_RANK_DENS 0x100000000 +#define DDR_SDRAM_WIDTH 64 +#define DDR_EC_SDRAM_W 8 +#define DDR_N_ROW_ADDR 16 +#define DDR_N_COL_ADDR 10 +#define DDR_N_BANKS 8 +#define DDR_EDC_CONFIG 2 +#define DDR_BURSTL_MASK 0x0c +#define DDR_TCKMIN_X_PS 1500 +#define DDR_TCMMAX_PS 3000 +#define DDR_CASLAT_X 0x000007E0 +#define DDR_TAA_PS 13500 +#define DDR_TRCD_PS 13500 +#define DDR_TRP_PS 13500 +#define DDR_TRAS_PS 36000 +#define DDR_TRC_PS 49500 +#define DDR_TFAW_PS 30000 +#define DDR_TWR_PS 15000 +#define DDR_TRFC_PS 260000 +#define DDR_TRRD_PS 6000 +#define DDR_TWTR_PS 7500 +#define DDR_TRTP_PS 7500 +#define DDR_REF_RATE_PS 7800000 + +#define DDR_CS0_BNDS_VAL 0x000000FF +#define DDR_CS1_BNDS_VAL 0x010001FF +#define DDR_CS2_BNDS_VAL 0x0300033F +#define DDR_CS3_BNDS_VAL 0x0340037F +#define DDR_CS0_CONFIG_VAL 0x80044402 +#define DDR_CS1_CONFIG_VAL 0x80044402 +#define DDR_CS2_CONFIG_VAL 0x00000202 +#define DDR_CS3_CONFIG_VAL 0x00040202 +#define DDR_CS_CONFIG_2_VAL 0x00000000 + +#define DDR_TIMING_CFG_0_VAL 0xFF550004 +#define DDR_TIMING_CFG_1_VAL 0xBCB48C56 +#define DDR_TIMING_CFG_2_VAL 0x0040C114 +#define DDR_TIMING_CFG_3_VAL 0x010C1000 +#define DDR_TIMING_CFG_4_VAL 0x00000001 +#define DDR_TIMING_CFG_5_VAL 0x03402400 + +#define DDR_SDRAM_MODE_VAL 0x00441C70 +#define DDR_SDRAM_MODE_2_VAL 0x00980000 +#define DDR_SDRAM_MODE_3_8_VAL 0x00000000 +#define DDR_SDRAM_MD_CNTL_VAL 0x00000000 + +#define DDR_SDRAM_CFG_VAL 0xE7044000 +#define DDR_SDRAM_CFG_2_VAL 0x00401050 + +#define DDR_SDRAM_INTERVAL_VAL 0x0C300100 +#define DDR_DATA_INIT_VAL 0xDEADBEEF +#define DDR_SDRAM_CLK_CNTL_VAL 0x02400000 +#define DDR_ZQ_CNTL_VAL 0x89080600 + +#define DDR_WRLVL_CNTL_VAL 0x8675F608 +#define DDR_WRLVL_CNTL_2_VAL 0x080A0A0C +#define DDR_WRLVL_CNTL_3_VAL 0x0C0E0E0D + +#define DDR_SDRAM_RCW_1_VAL 0x00000000 +#define DDR_SDRAM_RCW_2_VAL 0x00000000 + +#define DDR_DDRCDR_1_VAL 0x80040000 +#define DDR_DDRCDR_2_VAL 0x00000001 + +#define DDR_ERR_INT_EN_VAL 0x0000001D +#define DDR_ERR_SBE_VAL 0x00010000 + + +/* 12.4 DDR Memory Map */ +#define DDR_BASE (CCSRBAR + 0x8000) + +#define DDR_CS_BNDS(n) ((volatile uint32_t*)(DDR_BASE + 0x000 + (n * 8))) /* Chip select n memory bounds */ +#define DDR_CS_CONFIG(n) ((volatile uint32_t*)(DDR_BASE + 0x080 + (n * 4))) /* Chip select n configuration */ +#define DDR_CS_CONFIG_2(n) ((volatile uint32_t*)(DDR_BASE + 0x0C0 + (n * 4))) /* Chip select n configuration 2 */ +#define DDR_TIMING_CFG_3 ((volatile uint32_t*)(DDR_BASE + 0x100)) /* DDR SDRAM timing configuration 3 */ +#define DDR_TIMING_CFG_0 ((volatile uint32_t*)(DDR_BASE + 0x104)) /* DDR SDRAM timing configuration 0 */ +#define DDR_TIMING_CFG_1 ((volatile uint32_t*)(DDR_BASE + 0x108)) /* DDR SDRAM timing configuration 1 */ +#define DDR_TIMING_CFG_2 ((volatile uint32_t*)(DDR_BASE + 0x10C)) /* DDR SDRAM timing configuration 2 */ +#define DDR_SDRAM_CFG ((volatile uint32_t*)(DDR_BASE + 0x110)) /* DDR SDRAM control configuration */ +#define DDR_SDRAM_CFG_2 ((volatile uint32_t*)(DDR_BASE + 0x114)) /* DDR SDRAM control configuration 2 */ +#define DDR_SDRAM_MODE ((volatile uint32_t*)(DDR_BASE + 0x118)) /* DDR SDRAM mode configuration */ +#define DDR_SDRAM_MODE_2 ((volatile uint32_t*)(DDR_BASE + 0x11C)) /* DDR SDRAM mode configuration 2 */ +#define DDR_SDRAM_MD_CNTL ((volatile uint32_t*)(DDR_BASE + 0x120)) /* DDR SDRAM mode control */ +#define DDR_SDRAM_INTERVAL ((volatile uint32_t*)(DDR_BASE + 0x124)) /* DDR SDRAM interval configuration */ +#define DDR_DATA_INIT ((volatile uint32_t*)(DDR_BASE + 0x128)) /* DDR training initialization value */ +#define DDR_SDRAM_CLK_CNTL ((volatile uint32_t*)(DDR_BASE + 0x130)) /* DDR SDRAM clock control */ +#define DDR_INIT_ADDR ((volatile uint32_t*)(DDR_BASE + 0x148)) /* DDR training initialization address */ +#define DDR_INIT_EXT_ADDR ((volatile uint32_t*)(DDR_BASE + 0x14C)) /* DDR training initialization extended address */ +#define DDR_TIMING_CFG_4 ((volatile uint32_t*)(DDR_BASE + 0x160)) /* DDR SDRAM timing configuration 4 */ +#define DDR_TIMING_CFG_5 ((volatile uint32_t*)(DDR_BASE + 0x164)) /* DDR SDRAM timing configuration 5 */ +#define DDR_TIMING_CFG_6 ((volatile uint32_t*)(DDR_BASE + 0x168)) /* DDR SDRAM timing configuration 6 */ +#define DDR_ZQ_CNTL ((volatile uint32_t*)(DDR_BASE + 0x170)) /* DDR ZQ calibration control */ +#define DDR_WRLVL_CNTL ((volatile uint32_t*)(DDR_BASE + 0x174)) /* DDR write leveling control */ +#define DDR_SR_CNTR ((volatile uint32_t*)(DDR_BASE + 0x17C)) /* DDR Self Refresh Counter */ +#define DDR_SDRAM_RCW_1 ((volatile uint32_t*)(DDR_BASE + 0x180)) /* DDR Register Control Word 1 */ +#define DDR_SDRAM_RCW_2 ((volatile uint32_t*)(DDR_BASE + 0x184)) /* DDR Register Control Word 2 */ +#define DDR_WRLVL_CNTL_2 ((volatile uint32_t*)(DDR_BASE + 0x190)) /* DDR write leveling control 2 */ +#define DDR_WRLVL_CNTL_3 ((volatile uint32_t*)(DDR_BASE + 0x194)) /* DDR write leveling control 3 */ +#define DDR_SDRAM_MODE_3 ((volatile uint32_t*)(DDR_BASE + 0x200)) /* DDR SDRAM mode configuration 3 */ +#define DDR_SDRAM_MODE_4 ((volatile uint32_t*)(DDR_BASE + 0x204)) /* DDR SDRAM mode configuration 4 */ +#define DDR_SDRAM_MODE_5 ((volatile uint32_t*)(DDR_BASE + 0x208)) /* DDR SDRAM mode configuration 5 */ +#define DDR_SDRAM_MODE_6 ((volatile uint32_t*)(DDR_BASE + 0x20C)) /* DDR SDRAM mode configuration 6 */ +#define DDR_SDRAM_MODE_7 ((volatile uint32_t*)(DDR_BASE + 0x210)) /* DDR SDRAM mode configuration 7 */ +#define DDR_SDRAM_MODE_8 ((volatile uint32_t*)(DDR_BASE + 0x214)) /* DDR SDRAM mode configuration 8 */ +#define DDR_DDRCDR_1 ((volatile uint32_t*)(DDR_BASE + 0xB28)) /* DDR Control Driver Register 1 */ +#define DDR_DDRCDR_2 ((volatile uint32_t*)(DDR_BASE + 0xB2C)) /* DDR Control Driver Register 2 */ +#define DDR_DDRDSR_1 ((volatile uint32_t*)(DDR_BASE + 0xB20)) /* DDR Debug Status Register 1 */ +#define DDR_DDRDSR_2 ((volatile uint32_t*)(DDR_BASE + 0xB24)) /* DDR Debug Status Register 2 */ +#define DDR_ERR_DISABLE ((volatile uint32_t*)(DDR_BASE + 0xE44)) /* Memory error disable */ +#define DDR_ERR_INT_EN ((volatile uint32_t*)(DDR_BASE + 0xE48)) /* Memory error interrupt enable */ +#define DDR_ERR_SBE ((volatile uint32_t*)(DDR_BASE + 0xE58)) /* Single-Bit ECC memory error management */ + +#define DDR_SDRAM_CFG_MEM_EN 0x80000000 /* SDRAM interface logic is enabled */ +#define DDR_SDRAM_CFG_BI 0x00000001 /* Bypass initialization */ +#define DDR_SDRAM_CFG_2_D_INIT 0x00000010 /* data initialization in progress */ + +#endif /* NXP_T2080_H */ diff --git a/hal/nxp_t2080.ld b/hal/nxp_t2080.ld index 43e692cab9..65940daee8 100644 --- a/hal/nxp_t2080.ld +++ b/hal/nxp_t2080.ld @@ -13,8 +13,8 @@ MEMORY { FLASH (rx) : ORIGIN = @WOLFBOOT_ORIGIN@, LENGTH = @BOOTLOADER_PARTITION_SIZE@ - /* L2 as SRAM - 256KB */ - RAM (rwx) : ORIGIN = 0xF8F80000, LENGTH = 0x40000 + /* CPC as SRAM - 1MB */ + RAM (rwx) : ORIGIN = 0xF8F00000, LENGTH = 0x100000 /* DDR - 2GB */ DRAM (rwx) : ORIGIN = 0x00000000, LENGTH = 0x7FFFFFFF diff --git a/src/boot_ppc_start.S b/src/boot_ppc_start.S index b0fb90b9ab..84b46189f7 100644 --- a/src/boot_ppc_start.S +++ b/src/boot_ppc_start.S @@ -72,6 +72,21 @@ All TLBs for boot will be in TLB1 and supervisor mode (not user) #include "hal/nxp_ppc.h" +/* e6500 has 64-bit GPRs. When loading 32-bit addresses with bit 31 set + * (addresses >= 0x80000000), the lis instruction sign-extends, putting + * 0xFFFFFFFF in the upper 32 bits. This causes memory access failures. + * Use LOAD_ADDR32 macro to properly load 32-bit addresses on e6500. */ +#ifdef CORE_E6500 +#define LOAD_ADDR32(reg, addr) \ + li reg, 0; \ + oris reg, reg, (addr)@h; \ + ori reg, reg, (addr)@l +#else +#define LOAD_ADDR32(reg, addr) \ + lis reg, (addr)@h; \ + ori reg, reg, (addr)@l +#endif + /* variables from linker script */ .global _start_vector .global isr_empty @@ -194,6 +209,13 @@ startup_init: #ifndef TLB1_NEW_SIZE #define TLB1_NEW_SIZE BOOKE_PAGESZ_256K #endif +/* EPN alignment mask for TLB1_NEW_SIZE page. + * e6500: page = 2^(TSIZE+10), e500/e5500: page = 2^(2*TSIZE+10) */ +#ifdef CORE_E6500 +#define TLB1_EPN_MASK (~((1 << (TLB1_NEW_SIZE + 10)) - 1)) +#else +#define TLB1_EPN_MASK (~((1 << (2 * TLB1_NEW_SIZE + 10)) - 1)) +#endif shrink_default_tlb1: /* Shrink the current TLB1 entry */ bl find_pc @@ -219,12 +241,14 @@ find_pc: oris r3, r3, MAS1_IPROT@h mtspr MAS1, r3 - /* Find page for PC (R1) */ - lis r3, MAS2_EPN@h - ori r3, r3, MAS2_EPN@l + /* Align PC (R1) to TLB page size boundary */ + lis r3, TLB1_EPN_MASK@h + ori r3, r3, TLB1_EPN_MASK@l and r1, r1, r3 /* Set the real and virtual page for this TLB */ + lis r3, MAS2_EPN@h + ori r3, r3, MAS2_EPN@l mfspr r2, MAS2 andc r2, r2, r3 or r2, r2, r1 @@ -267,7 +291,8 @@ find_pc: setup_interrupts: /* Setup interrupt vectors */ - lis r1, (_start_vector)@h + /* e6500 GPRs are 64-bit; avoid sign-extension for high addresses */ + LOAD_ADDR32(r1, _start_vector) mtspr IVPR, r1 /* set the 48-bit high-order prefix address */ #ifdef ENABLE_INTERRUPTS @@ -412,10 +437,17 @@ invalidate_temp_tlb: boot_page: /* make sure we have the default boot page added to MMU */ /* BOOT_PAGE: TLB 1, Entry 0, Supervisor X/R/W, I, TS=0, 4KB, IPROT */ + /* Skip if Entry 0 is the currently executing TLB (R14 from + * shrink_default_tlb1). Overwriting it with a 4K page would unmap + * the code we are running from. The shrink code already set Entry 0 + * to 256K with IPROT, which is sufficient. */ + cmpwi r14, 0 + beq 1f set_tlb(1, 0, BOOT_ROM_ADDR, BOOT_ROM_ADDR, 0, MAS3_SX | MAS3_SW | MAS3_SR, MAS2_I, 0, BOOKE_PAGESZ_4K, 1, r3); +1: #endif ccsr_tlb: @@ -431,8 +463,7 @@ ccsr_law: #define CCSR_LAW (LAWAR_ENABLE | \ LAWAR_TRGT_ID(LAW_TRGT_CORENET) | \ LAW_SIZE_16MB) - lis r9, CCSRBAR + LAWBAR_BASE(0)@h - ori r9, r9, CCSRBAR + LAWBAR_BASE(0)@l + LOAD_ADDR32(r9, CCSRBAR + LAWBAR_BASE(0)) lis r0, CCSRBAR_PHYS_HIGH@h ori r0, r0, CCSRBAR_PHYS_HIGH@l lis r1, CCSRBAR@h @@ -456,8 +487,7 @@ flash_law: #define FLASH_LAW (LAWAR_ENABLE | \ LAWAR_TRGT_ID(LAW_TRGT_IFC) | \ FLASH_LAW_SIZE) - lis r9, CCSRBAR + LAWBAR_BASE(1)@h - ori r9, r9, CCSRBAR + LAWBAR_BASE(1)@l + LOAD_ADDR32(r9, CCSRBAR + LAWBAR_BASE(1)) lis r0, FLASH_BASE_PHYS_HIGH@h ori r0, r0, FLASH_BASE_PHYS_HIGH@l lis r1, FLASH_BASE_ADDR@h @@ -511,25 +541,33 @@ flash_tlb: #endif /* Map initial DDR, but can be adjusted later in hal_ddr_init() */ - /* DDR - TBL=1, Entry 12/13 */ + /* DDR - TBL=1, Entry 12 (and 13 for e500) */ + #ifdef CORE_E6500 + /* e6500 supports 2GB page size - use single TLB entry */ + set_tlb(1, 12, DDR_ADDRESS, DDR_ADDRESS, 0, + MAS3_SX | MAS3_SW | MAS3_SR, DDR_WING, + 0, BOOKE_PAGESZ_2G, 1, r3); + #else + /* e500 uses two 1GB TLB entries */ set_tlb(1, 12, DDR_ADDRESS, DDR_ADDRESS, 0, MAS3_SX | MAS3_SW | MAS3_SR, DDR_WING, 0, BOOKE_PAGESZ_1G, 1, r3); - #if DDR_SIZE > 0x40000000 + #if DDR_SIZE > 0x40000000 set_tlb(1, 13, DDR_ADDRESS + 0x40000000, DDR_ADDRESS + 0x40000000, 0, MAS3_SX | MAS3_SW | MAS3_SR, DDR_WING, 0, BOOKE_PAGESZ_1G, 1, r3); + #endif #endif #endif /* ENABLE_DDR */ #ifdef INITIAL_SRAM_ADDR +#ifndef INITIAL_SRAM_NO_LAW init_sram_law: /* Intial SRAM LAW 2 */ #define INITIAL_SRAM_LAW (LAWAR_ENABLE | \ LAWAR_TRGT_ID(INITIAL_SRAM_LAW_TRGT) | \ INITIAL_SRAM_LAW_SZ) - lis r9, CCSRBAR + LAWBAR_BASE(2)@h - ori r9, r9, CCSRBAR + LAWBAR_BASE(2)@l + LOAD_ADDR32(r9, CCSRBAR + LAWBAR_BASE(2)) li r0, 0 /* UPPER=0 */ lis r1, INITIAL_SRAM_ADDR@h ori r1, r1, INITIAL_SRAM_ADDR@l @@ -542,9 +580,12 @@ init_sram_law: /* read back LAWAR (per 2.3.2 Configuring Local Access Windows) */ lwz r2, 8(r9) isync +#endif /* !INITIAL_SRAM_NO_LAW */ init_sram_tlb: - /* Initial SRAM: TLB 1, Entry 9, Supervisor X/R/W, M, TS=0, IPROT */ + /* Initial SRAM: TLB 1, Entry 9, Supervisor X/R/W, M, TS=0, IPROT + * CPC SRAM uses cacheable memory-coherent (M) access. + * SRAM is zeroed via dcbz to avoid reading uninitialized ECC data. */ set_tlb(1, 9, INITIAL_SRAM_ADDR, INITIAL_SRAM_ADDR, 0, MAS3_SX | MAS3_SW | MAS3_SR, MAS2_M, 0, @@ -556,31 +597,31 @@ init_sram_tlb: #if defined(CORE_E5500) || defined(CORE_E6500) /* --- L2 E5500/E6500 --- */ #ifdef L2SRAM_ADDR l2_setup_sram: - /* T2080RM: 8.4.2.2 or T1024RM 13.4.2.2 - * Enabling the CPC after Power-On Reset */ + /* T2080RM: 8.4.2.2 - CPC initialization + * Restored working configuration from pre-T1024 codebase: + * Configure SRAM control registers, then enable CPC with parity. + * The LAW (DDR_1) provides CoreNet routing; CPC intercepts before DDR. + * SRAM is zeroed later via dcbz through cacheable TLB (MAS2_M). */ /* R1 = CPC base */ - lis r1, CPC_BASE@h - ori r1, r1, CPC_BASE@l + LOAD_ADDR32(r1, CPC_BASE) - /* Set CPC SRAM control register */ - /* SRAM high addrress 0x0 */ + /* Configure CPC SRAM control registers */ li r0, 0 - stw r0, CPCSRCR1(r1) - /* SRAM low address */ - lis r0, L2SRAM_ADDR@h - ori r0, r0, L2SRAM_ADDR@l + stw r0, CPCSRCR1(r1) /* SRAM high address = 0 */ + /* SRAM low address - use LOAD_ADDR32 on e6500 to avoid sign extension */ + LOAD_ADDR32(r0, L2SRAM_ADDR) /* Enable SRAM and set size (must match L2SRAM_SIZE) */ - ori r0, r0, (CPCSRCR0_SRAMSZ_256 | CPCSRCR0_SRAMEN) + ori r0, r0, (CPCSRCR0_SRAMSZ_1024 | CPCSRCR0_SRAMEN) stw r0, CPCSRCR0(r1) - /* Enable memory mapped SRAM */ - lis r0, CPCCSR0_SRAM_ENABLE@h + /* Enable CPC with parity */ + lis r0, (CPCCSR0_CPCE | CPCCSR0_CPCPE)@h mbar isync stw r0, CPCCSR0(r1) mbar - /* Disable speculation */ + /* Disable speculation (Errata A-006593) */ lwz r0, CPCHDBCR0(r1) oris r0, r0, CPCHDBCR0_SPEC_DIS@h stw r0, CPCHDBCR0(r1) @@ -590,15 +631,15 @@ l2_setup_sram: l2_setup_cache: /* E6500CORERM: 11.7 L2 cache state */ /* R5 = L2 cluster 1 base */ - lis r5, L2_CLUSTER_BASE(0)@h - ori r5, r5, L2_CLUSTER_BASE(0)@l - /* Invalidate and clear locks */ - lis r1, (L2CSR0_L2FI | L2CSR0_L2LFC)@h - ori r1, r1, (L2CSR0_L2FI | L2CSR0_L2LFC)@l + LOAD_ADDR32(r5, L2_CLUSTER_BASE(0)) + + /* Flash invalidate L2 (locks already clear after reset) */ + lis r1, L2CSR0_L2FI@h + ori r1, r1, L2CSR0_L2FI@l sync stw r1, L2CSR0(r5) - /* poll till invalidate and lock bits are cleared */ + /* Poll until L2FI clears */ l2_poll_invclear: lwz r4, L2CSR0(r5) and. r4, r1, r4 @@ -612,7 +653,7 @@ l2_poll_invclear: /* enable L2 with parity */ sync isync - lis r4, (L2CSR0_L2E | L2CSR0_L2PE)@h + LOAD_ADDR32(r4, (L2CSR0_L2E | L2CSR0_L2PE)) stw r4, L2CSR0(r5) isync @@ -713,10 +754,9 @@ l1_tlb: #endif #endif /* ENABLE_L1_CACHE */ -#ifdef CACHE_SRAM_ADDR +#ifdef L1_CACHE_ADDR cache_sram_init: - lis r3, CACHE_SRAM_ADDR@h - ori r3, r3, CACHE_SRAM_ADDR@l + LOAD_ADDR32(r3, L1_CACHE_ADDR) /* read the cache size */ mfspr r2, L1CFG0 andi. r2, r2, 0x1FF @@ -736,13 +776,25 @@ cache_sram_init_loop: #endif addi r3, r3, CACHE_LINE_SIZE bdnz cache_sram_init_loop -#endif /* CACHE_SRAM_ADDR */ +#elif defined(L2SRAM_ADDR) +cache_sram_init: + /* Zero CPC SRAM via cache (MAS2_M = cacheable, memory coherent). + * dcbz allocates zeroed cache lines without reading from CPC, + * avoiding ECC/parity issues from uninitialized SRAM. */ + LOAD_ADDR32(r3, L2SRAM_ADDR) + li r0, 0 + LOAD_ADDR32(r2, (L2SRAM_SIZE / CACHE_LINE_SIZE)) + mtctr r2 +1: + dcbz r0, r3 + addi r3, r3, CACHE_LINE_SIZE + bdnz 1b +#endif /* L1_CACHE_ADDR */ setup_stack: /* Build top of stack address */ /* Reserve 64 bytes of initial data (must be 16 byte aligned) */ - lis r1, (_end_stack-64)@h - ori r1, r1, (_end_stack-64)@l + LOAD_ADDR32(r1, _end_stack-64) /* PowerPC e500 Application Binary Interface User's Guide * 2.3.5.1.1 Minimal Stack Frame: No Local Variables or Saved Parameters @@ -882,7 +934,7 @@ dcache_disable: lis r4, 0 ori r4, r4, L1CSR_CE andc r3, r3, r4 - mtspr L1CSR0, r0 + mtspr L1CSR0, r3 isync blr #endif @@ -996,8 +1048,29 @@ in_ram: .section .isr_vector .align 8 isr_empty: - nop - rfi + /* Minimal fault dump for early bring-up */ +#ifdef L2SRAM_ADDR + LOAD_ADDR32(r0, L2SRAM_ADDR) + addi r0, r0, 0x200 + mfspr r1, SRR0 + stw r1, 0x00(r0) + mfspr r1, SRR1 + stw r1, 0x04(r0) + mfspr r1, SPRN_ESR + stw r1, 0x08(r0) + mfspr r1, SPRN_DEAR + stw r1, 0x0C(r0) + mfspr r1, SPRN_MCSR + stw r1, 0x10(r0) + mfspr r1, SPRN_PIR + stw r1, 0x14(r0) + /* Machine check exceptions use MCSRR0/MCSRR1 (not SRR0/SRR1) */ + mfspr r1, SPRN_MCSRR0 + stw r1, 0x18(r0) + mfspr r1, SPRN_MCSRR1 + stw r1, 0x1C(r0) +#endif +1: b 1b #endif /* reset entry point - must be at end of .S */ From b79351b2fb4fad4693112e04c0680e7892f8f4b8 Mon Sep 17 00:00:00 2001 From: David Garske Date: Fri, 6 Feb 2026 17:59:20 -0800 Subject: [PATCH 02/11] Fix some issue with L2 as SRAM --- hal/nxp_ppc.h | 16 +++++++++++----- src/boot_ppc_start.S | 10 ++++------ 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/hal/nxp_ppc.h b/hal/nxp_ppc.h index 1edd9f698e..a0d7c21b53 100644 --- a/hal/nxp_ppc.h +++ b/hal/nxp_ppc.h @@ -691,22 +691,28 @@ extern void dcache_disable(void); #else /* Assembly version */ #ifdef CORE_E6500 -/* e6500 has 64-bit MAS registers - must clear upper 32 bits */ +/* e6500 has 64-bit MAS registers - must clear upper 32 bits. + * Using lis would sign-extend values with bit 15 set (e.g., 0xC000xxxx). + * Use li 0; oris; ori pattern for all MAS registers. */ #define set_tlb(tlb, esel, epn, rpn, urpn, perms, winge, ts, tsize, iprot, reg) \ - lis reg, BOOKE_MAS0(tlb, esel, 0)@h; \ + li reg, 0; \ + oris reg, reg, BOOKE_MAS0(tlb, esel, 0)@h; \ ori reg, reg, BOOKE_MAS0(tlb, esel, 0)@l; \ mtspr MAS0, reg;\ - lis reg, BOOKE_MAS1(1, iprot, 0, ts, tsize)@h; \ + li reg, 0; \ + oris reg, reg, BOOKE_MAS1(1, iprot, 0, ts, tsize)@h; \ ori reg, reg, BOOKE_MAS1(1, iprot, 0, ts, tsize)@l; \ mtspr MAS1, reg; \ li reg, 0; \ oris reg, reg, BOOKE_MAS2(epn, winge)@h; \ ori reg, reg, BOOKE_MAS2(epn, winge)@l; \ mtspr MAS2, reg; \ - lis reg, BOOKE_MAS3(rpn, 0, perms)@h; \ + li reg, 0; \ + oris reg, reg, BOOKE_MAS3(rpn, 0, perms)@h; \ ori reg, reg, BOOKE_MAS3(rpn, 0, perms)@l; \ mtspr MAS3, reg; \ - lis reg, urpn@h; \ + li reg, 0; \ + oris reg, reg, urpn@h; \ ori reg, reg, urpn@l; \ mtspr MAS7, reg; \ isync; \ diff --git a/src/boot_ppc_start.S b/src/boot_ppc_start.S index 84b46189f7..645876e78c 100644 --- a/src/boot_ppc_start.S +++ b/src/boot_ppc_start.S @@ -569,10 +569,9 @@ init_sram_law: INITIAL_SRAM_LAW_SZ) LOAD_ADDR32(r9, CCSRBAR + LAWBAR_BASE(2)) li r0, 0 /* UPPER=0 */ - lis r1, INITIAL_SRAM_ADDR@h - ori r1, r1, INITIAL_SRAM_ADDR@l - lis r2, INITIAL_SRAM_LAW@h - ori r2, r2, INITIAL_SRAM_LAW@l + /* Use LOAD_ADDR32 on e6500 to avoid sign-extension for addresses >= 0x80000000 */ + LOAD_ADDR32(r1, INITIAL_SRAM_ADDR) + LOAD_ADDR32(r2, INITIAL_SRAM_LAW) stw r0, 0(r9) /* LAWBARH */ stw r1, 4(r9) /* LAWBARL */ sync @@ -585,7 +584,7 @@ init_sram_law: init_sram_tlb: /* Initial SRAM: TLB 1, Entry 9, Supervisor X/R/W, M, TS=0, IPROT * CPC SRAM uses cacheable memory-coherent (M) access. - * SRAM is zeroed via dcbz to avoid reading uninitialized ECC data. */ + * TLB is created BEFORE l2_setup_sram per old working code. */ set_tlb(1, 9, INITIAL_SRAM_ADDR, INITIAL_SRAM_ADDR, 0, MAS3_SX | MAS3_SW | MAS3_SR, MAS2_M, 0, @@ -598,7 +597,6 @@ init_sram_tlb: #ifdef L2SRAM_ADDR l2_setup_sram: /* T2080RM: 8.4.2.2 - CPC initialization - * Restored working configuration from pre-T1024 codebase: * Configure SRAM control registers, then enable CPC with parity. * The LAW (DDR_1) provides CoreNet routing; CPC intercepts before DDR. * SRAM is zeroed later via dcbz through cacheable TLB (MAS2_M). */ From cce18250d93b8a1efafaf79bb0986b047465c366 Mon Sep 17 00:00:00 2001 From: David Garske Date: Fri, 6 Feb 2026 20:09:01 -0800 Subject: [PATCH 03/11] Fixes for T2080 flash driver --- hal/nxp_t2080.c | 201 ++++++++++++++++++++++++++++++++++++++++++------ hal/nxp_t2080.h | 5 +- 2 files changed, 179 insertions(+), 27 deletions(-) diff --git a/hal/nxp_t2080.c b/hal/nxp_t2080.c index e5a2272f78..e58555615d 100644 --- a/hal/nxp_t2080.c +++ b/hal/nxp_t2080.c @@ -28,6 +28,46 @@ /* generic shared NXP QorIQ driver code */ #include "nxp_ppc.c" +/* AMD CFI Commands (Spansion/Cypress) */ +#define AMD_CMD_RESET 0xF0 +#define AMD_CMD_WRITE 0xA0 +#define AMD_CMD_ERASE_START 0x80 +#define AMD_CMD_ERASE_SECTOR 0x30 +#define AMD_CMD_UNLOCK_START 0xAA +#define AMD_CMD_UNLOCK_ACK 0x55 +#define AMD_CMD_WRITE_TO_BUFFER 0x25 +#define AMD_CMD_WRITE_BUFFER_CONFIRM 0x29 +#define AMD_CMD_SET_PPB_ENTRY 0xC0 +#define AMD_CMD_SET_PPB_EXIT_BC1 0x90 +#define AMD_CMD_SET_PPB_EXIT_BC2 0x00 +#define AMD_CMD_PPB_UNLOCK_BC1 0x80 +#define AMD_CMD_PPB_UNLOCK_BC2 0x30 +#define AMD_CMD_PPB_LOCK_BC1 0xA0 +#define AMD_CMD_PPB_LOCK_BC2 0x00 + +#define AMD_STATUS_TOGGLE 0x40 +#define AMD_STATUS_ERROR 0x20 + +/* Flash unlock addresses */ +#if FLASH_CFI_WIDTH == 16 +#define FLASH_UNLOCK_ADDR1 0x555 +#define FLASH_UNLOCK_ADDR2 0x2AA +#else +#define FLASH_UNLOCK_ADDR1 0xAAA +#define FLASH_UNLOCK_ADDR2 0x555 +#endif + +/* Flash IO Helpers */ +#if FLASH_CFI_WIDTH == 16 +#define FLASH_IO8_WRITE(sec, n, val) *((volatile uint16_t*)(FLASH_BASE_ADDR + (FLASH_SECTOR_SIZE * (sec)) + ((n) * 2))) = (((val) << 8) | (val)) +#define FLASH_IO16_WRITE(sec, n, val) *((volatile uint16_t*)(FLASH_BASE_ADDR + (FLASH_SECTOR_SIZE * (sec)) + ((n) * 2))) = (val) +#define FLASH_IO8_READ(sec, n) (uint8_t)(*((volatile uint16_t*)(FLASH_BASE_ADDR + (FLASH_SECTOR_SIZE * (sec)) + ((n) * 2)))) +#define FLASH_IO16_READ(sec, n) *((volatile uint16_t*)(FLASH_BASE_ADDR + (FLASH_SECTOR_SIZE * (sec)) + ((n) * 2))) +#else +#define FLASH_IO8_WRITE(sec, n, val) *((volatile uint8_t*)(FLASH_BASE_ADDR + (FLASH_SECTOR_SIZE * (sec)) + (n))) = (val) +#define FLASH_IO8_READ(sec, n) *((volatile uint8_t*)(FLASH_BASE_ADDR + (FLASH_SECTOR_SIZE * (sec)) + (n))) +#endif + #ifdef DEBUG_UART void uart_init(void) @@ -87,7 +127,7 @@ static void hal_flash_init(void) { /* IFC - NOR Flash */ /* LAW is also set in boot_ppc_start.S:flash_law */ - set_law(1, FLASH_BASE_PHYS_HIGH, FLASH_BASE, LAW_TRGT_IFC, LAW_SIZE_128MB, 1); + set_law(1, FLASH_BASE_PHYS_HIGH, FLASH_BASE_ADDR, LAW_TRGT_IFC, LAW_SIZE_128MB, 1); /* NOR IFC Flash Timing Parameters */ set32(IFC_FTIM0(0), (IFC_FTIM0_NOR_TACSE(4) | @@ -103,7 +143,7 @@ static void hal_flash_init(void) set32(IFC_FTIM3(0), 0); /* NOR IFC Definitions (CS0) */ set32(IFC_CSPR_EXT(0), 0xF); - set32(IFC_CSPR(0), (IFC_CSPR_PHYS_ADDR(FLASH_BASE) | + set32(IFC_CSPR(0), (IFC_CSPR_PHYS_ADDR(FLASH_BASE_ADDR) | IFC_CSPR_PORT_SIZE_16 | IFC_CSPR_MSEL_NOR | IFC_CSPR_V)); @@ -289,20 +329,135 @@ void hal_init(void) #endif /* ENABLE_CPLD */ } +static void hal_flash_unlock_sector(uint32_t sector) +{ + /* AMD unlock sequence */ + FLASH_IO8_WRITE(sector, FLASH_UNLOCK_ADDR1, AMD_CMD_UNLOCK_START); + FLASH_IO8_WRITE(sector, FLASH_UNLOCK_ADDR2, AMD_CMD_UNLOCK_ACK); +} + +/* wait for toggle to stop and status mask to be met within microsecond timeout */ +static int hal_flash_status_wait(uint32_t sector, uint16_t mask, + uint32_t timeout_us) +{ + int ret = 0; + uint32_t timeout = 0; + uint16_t read1, read2; + + do { + /* detection of completion happens when reading status bits + * DQ6 and DQ2 stop toggling (0x44) */ + read1 = FLASH_IO8_READ(sector, 0); + if ((read1 & AMD_STATUS_TOGGLE) == 0) + read1 = FLASH_IO8_READ(sector, 0); + read2 = FLASH_IO8_READ(sector, 0); + if ((read2 & AMD_STATUS_TOGGLE) == 0) + read2 = FLASH_IO8_READ(sector, 0); + #ifdef DEBUG_FLASH + wolfBoot_printf("Wait toggle %x -> %x\n", read1, read2); + #endif + if (read1 == read2 && ((read1 & mask) == mask)) + break; + udelay(1); + } while (timeout++ < timeout_us); + if (timeout >= timeout_us) { + ret = -1; /* timeout */ + } +#ifdef DEBUG_FLASH + wolfBoot_printf("Wait done (%d tries): %x -> %x\n", + timeout, read1, read2); +#endif + return ret; +} + int hal_flash_write(uint32_t address, const uint8_t *data, int len) { - (void)address; - (void)data; - (void)len; - /* TODO: Implement NOR flash write using IFC */ + uint32_t i, pos, sector, offset, xfer, nwords; + + /* adjust for flash base */ + if (address >= FLASH_BASE_ADDR) + address -= FLASH_BASE_ADDR; + +#ifdef DEBUG_FLASH + wolfBoot_printf("Flash Write: Ptr %p -> Addr 0x%x (len %d)\n", + data, address, len); +#endif + + pos = 0; + while (len > 0) { + /* determine sector address */ + sector = (address / FLASH_SECTOR_SIZE); + offset = address - (sector * FLASH_SECTOR_SIZE); + offset /= (FLASH_CFI_WIDTH/8); + xfer = len; + if (xfer > FLASH_PAGE_SIZE) + xfer = FLASH_PAGE_SIZE; + nwords = xfer / (FLASH_CFI_WIDTH/8); + + #ifdef DEBUG_FLASH + wolfBoot_printf("Flash Write: Sector %d, Offset %d, Len %d, Pos %d\n", + sector, offset, xfer, pos); + #endif + + hal_flash_unlock_sector(sector); + FLASH_IO8_WRITE(sector, offset, AMD_CMD_WRITE_TO_BUFFER); + #if FLASH_CFI_WIDTH == 16 + FLASH_IO16_WRITE(sector, offset, (nwords-1)); + #else + FLASH_IO8_WRITE(sector, offset, (nwords-1)); + #endif + + for (i=0; i= FLASH_BASE_ADDR) + address -= FLASH_BASE_ADDR; + + while (len > 0) { + /* determine sector address */ + sector = (address / FLASH_SECTOR_SIZE); + + #ifdef DEBUG_FLASH + wolfBoot_printf("Flash Erase: Sector %d, Addr 0x%x, Len %d\n", + sector, address, len); + #endif + + hal_flash_unlock_sector(sector); + FLASH_IO8_WRITE(sector, FLASH_UNLOCK_ADDR1, AMD_CMD_ERASE_START); + hal_flash_unlock_sector(sector); + FLASH_IO8_WRITE(sector, 0, AMD_CMD_ERASE_SECTOR); + /* block erase timeout = 50us - for additional sectors */ + /* Typical is 200ms (max 1100ms) */ + + /* poll for erase completion - max 1.1 sec */ + hal_flash_status_wait(sector, 0x4C, 1100*1000); + + address += FLASH_SECTOR_SIZE; + len -= FLASH_SECTOR_SIZE; + } return 0; } @@ -310,30 +465,30 @@ void hal_flash_unlock(void) { /* Disable all flash protection bits */ /* enter Non-volatile protection mode (C0h) */ - *((volatile uint16_t*)(FLASH_BASE + 0xAAA)) = 0xAAAA; - *((volatile uint16_t*)(FLASH_BASE + 0x554)) = 0x5555; - *((volatile uint16_t*)(FLASH_BASE + 0xAAA)) = 0xC0C0; + *((volatile uint16_t*)(FLASH_BASE_ADDR + 0xAAA)) = 0xAAAA; + *((volatile uint16_t*)(FLASH_BASE_ADDR + 0x554)) = 0x5555; + *((volatile uint16_t*)(FLASH_BASE_ADDR + 0xAAA)) = 0xC0C0; /* clear all protection bit (80h/30h) */ - *((volatile uint16_t*)(FLASH_BASE + 0x000)) = 0x8080; - *((volatile uint16_t*)(FLASH_BASE + 0x000)) = 0x3030; + *((volatile uint16_t*)(FLASH_BASE_ADDR + 0x000)) = 0x8080; + *((volatile uint16_t*)(FLASH_BASE_ADDR + 0x000)) = 0x3030; /* exit Non-volatile protection mode (90h/00h) */ - *((volatile uint16_t*)(FLASH_BASE + 0x000)) = 0x9090; - *((volatile uint16_t*)(FLASH_BASE + 0x000)) = 0x0000; + *((volatile uint16_t*)(FLASH_BASE_ADDR + 0x000)) = 0x9090; + *((volatile uint16_t*)(FLASH_BASE_ADDR + 0x000)) = 0x0000; } void hal_flash_lock(void) { /* Enable all flash protection bits */ /* enter Non-volatile protection mode (C0h) */ - *((volatile uint16_t*)(FLASH_BASE + 0xAAA)) = 0xAAAA; - *((volatile uint16_t*)(FLASH_BASE + 0x554)) = 0x5555; - *((volatile uint16_t*)(FLASH_BASE + 0xAAA)) = 0xC0C0; + *((volatile uint16_t*)(FLASH_BASE_ADDR + 0xAAA)) = 0xAAAA; + *((volatile uint16_t*)(FLASH_BASE_ADDR + 0x554)) = 0x5555; + *((volatile uint16_t*)(FLASH_BASE_ADDR + 0xAAA)) = 0xC0C0; /* set all protection bit (A0h/00h) */ - *((volatile uint16_t*)(FLASH_BASE + 0x000)) = 0xA0A0; - *((volatile uint16_t*)(FLASH_BASE + 0x000)) = 0x0000; + *((volatile uint16_t*)(FLASH_BASE_ADDR + 0x000)) = 0xA0A0; + *((volatile uint16_t*)(FLASH_BASE_ADDR + 0x000)) = 0x0000; /* exit Non-volatile protection mode (90h/00h) */ - *((volatile uint16_t*)(FLASH_BASE + 0x000)) = 0x9090; - *((volatile uint16_t*)(FLASH_BASE + 0x000)) = 0x0000; + *((volatile uint16_t*)(FLASH_BASE_ADDR + 0x000)) = 0x9090; + *((volatile uint16_t*)(FLASH_BASE_ADDR + 0x000)) = 0x0000; } void hal_prepare_boot(void) diff --git a/hal/nxp_t2080.h b/hal/nxp_t2080.h index 7f5b492b12..13ebb63e98 100644 --- a/hal/nxp_t2080.h +++ b/hal/nxp_t2080.h @@ -127,14 +127,11 @@ enum ifc_amask_sizes { /* ---- NOR Flash ---- */ -#define FLASH_BASE 0xE8000000 - #define FLASH_BANK_SIZE (128*1024*1024) #define FLASH_PAGE_SIZE (1024) /* program buffer */ #define FLASH_SECTOR_SIZE (128*1024) #define FLASH_SECTORS (FLASH_BANK_SIZE / FLASH_SECTOR_SIZE) -#define FLASH_CFI_16BIT 0x02 /* word */ -#define FLASH_CFI_WIDTH FLASH_CFI_16BIT +#define FLASH_CFI_WIDTH 16 /* 8 or 16 */ #define FLASH_ERASE_TOUT 60000 /* Flash Erase Timeout (ms) */ #define FLASH_WRITE_TOUT 500 /* Flash Write Timeout (ms) */ From 26395b46b27c87686ee2662d112626abcc401ee6 Mon Sep 17 00:00:00 2001 From: David Garske Date: Mon, 9 Feb 2026 08:42:26 -0800 Subject: [PATCH 04/11] Process with drivers (flash, DTS, multi-core / spin table) --- arch.mk | 1 + config/examples/nxp-t2080.config | 12 +- hal/nxp_t2080.c | 337 ++++++++++++++++++++++++++++--- hal/nxp_t2080.h | 19 ++ 4 files changed, 333 insertions(+), 36 deletions(-) diff --git a/arch.mk b/arch.mk index 0e2446ccbf..3f7fa93457 100644 --- a/arch.mk +++ b/arch.mk @@ -1001,6 +1001,7 @@ ifeq ($(TARGET),nxp_t2080) LDFLAGS+=$(ARCH_FLAGS) LDFLAGS+=-Wl,--hash-style=both # generate both sysv and gnu symbol hash table LDFLAGS+=-Wl,--as-needed # remove weak functions not used + OBJS+=src/boot_ppc_mp.o # support for spin table UPDATE_OBJS:=src/update_ram.o OBJS+=src/fdt.o endif diff --git a/config/examples/nxp-t2080.config b/config/examples/nxp-t2080.config index 190b99bd30..f15f1acbf5 100644 --- a/config/examples/nxp-t2080.config +++ b/config/examples/nxp-t2080.config @@ -21,19 +21,19 @@ RAM_CODE?=0 DUALBANK_SWAP?=0 PKA?=1 WOLFTPM?=0 -WOLFBOOT_ORIGIN?=0xEFFF0000 +WOLFBOOT_ORIGIN?=0xEFFE0000 WOLFBOOT_PARTITION_SIZE?=0x20000 WOLFBOOT_SECTOR_SIZE?=0x10000 -ARCH_FLASH_OFFSET?=0xEFFF0000 -BOOTLOADER_PARTITION_SIZE=0x10000 +ARCH_FLASH_OFFSET?=0xEFFE0000 +BOOTLOADER_PARTITION_SIZE=0x20000 -WOLFBOOT_PARTITION_BOOT_ADDRESS?=0xEFFD0000 +WOLFBOOT_PARTITION_BOOT_ADDRESS?=0xEFFC0000 WOLFBOOT_LOAD_ADDRESS?=0x19000 -WOLFBOOT_PARTITION_UPDATE_ADDRESS?=0xEFFB0000 +WOLFBOOT_PARTITION_UPDATE_ADDRESS?=0xEFFA0000 # Location of temporary sector used during updates -WOLFBOOT_PARTITION_SWAP_ADDRESS?=0xEFFA0000 +WOLFBOOT_PARTITION_SWAP_ADDRESS?=0xEFF90000 # DTS (Device Tree) WOLFBOOT_DTS_BOOT_ADDRESS?=0xE8040000 diff --git a/hal/nxp_t2080.c b/hal/nxp_t2080.c index e58555615d..4becdddcfe 100644 --- a/hal/nxp_t2080.c +++ b/hal/nxp_t2080.c @@ -28,7 +28,21 @@ /* generic shared NXP QorIQ driver code */ #include "nxp_ppc.c" +#define ENABLE_IFC +#define ENABLE_BUS_CLK_CALC + +#ifndef BUILD_LOADER_STAGE1 + #define ENABLE_MP /* multi-core support */ +#endif + +/* Forward declarations */ +static void hal_flash_unlock_sector(uint32_t sector); +#ifdef ENABLE_MP +static void hal_mp_init(void); +#endif + /* AMD CFI Commands (Spansion/Cypress) */ +#define FLASH_CMD_READ_ID 0x90 #define AMD_CMD_RESET 0xF0 #define AMD_CMD_WRITE 0xA0 #define AMD_CMD_ERASE_START 0x80 @@ -116,15 +130,76 @@ void law_init(void) set_law(3, 0xF, 0xF4000000, LAW_TRGT_BMAN, LAW_SIZE_32MB, 1); } -/* Delay helper using timebase */ -#define DELAY_US (SYS_CLK / 1000000) +/* Clock helpers */ +#ifdef ENABLE_BUS_CLK_CALC +static uint32_t hal_get_core_clk(void) +{ + /* compute core clock (system input * ratio) */ + uint32_t core_clk; + uint32_t core_ratio = get32(CLOCKING_PLLCNGSR(0)); /* see CGA_PLL1_RAT in RCW */ + /* shift by 1 and mask */ + core_ratio = ((core_ratio >> 1) & 0x3F); + core_clk = SYS_CLK * core_ratio; + return core_clk; +} +static uint32_t hal_get_plat_clk(void) +{ + /* compute platform clock (system input * ratio) */ + uint32_t plat_clk; + uint32_t plat_ratio = get32(CLOCKING_PLLPGSR); /* see SYS_PLL_RAT in RCW */ + /* shift by 1 and mask */ + plat_ratio = ((plat_ratio >> 1) & 0x1F); + plat_clk = SYS_CLK * plat_ratio; + return plat_clk; +} +static uint32_t hal_get_bus_clk(void) +{ + /* compute bus clock (platform clock / 2) */ + uint32_t bus_clk = hal_get_plat_clk() / 2; + return bus_clk; +} +#else +#define hal_get_core_clk() (uint32_t)(SYS_CLK * 14) +#define hal_get_plat_clk() (uint32_t)(SYS_CLK * 4) +#define hal_get_bus_clk() (uint32_t)(hal_get_plat_clk() / 2) +#endif + +#define TIMEBASE_CLK_DIV 16 +#define TIMEBASE_HZ (hal_get_plat_clk() / TIMEBASE_CLK_DIV) +#define DELAY_US (TIMEBASE_HZ / 1000000) static void udelay(uint32_t delay_us) { wait_ticks(delay_us * DELAY_US); } +#if defined(ENABLE_IFC) && !defined(BUILD_LOADER_STAGE1) +static int hal_flash_getid(void) +{ + uint8_t manfid[4]; + + hal_flash_unlock_sector(0); + FLASH_IO8_WRITE(0, FLASH_UNLOCK_ADDR1, FLASH_CMD_READ_ID); + udelay(1000); + + manfid[0] = FLASH_IO8_READ(0, 0); /* Manufacture Code */ + manfid[1] = FLASH_IO8_READ(0, 1); /* Device Code 1 */ + manfid[2] = FLASH_IO8_READ(0, 14); /* Device Code 2 */ + manfid[3] = FLASH_IO8_READ(0, 15); /* Device Code 3 */ + + /* Exit read info */ + FLASH_IO8_WRITE(0, 0, AMD_CMD_RESET); + udelay(1); + + wolfBoot_printf("Flash: Mfg 0x%x, Device Code 0x%x/0x%x/0x%x\n", + manfid[0], manfid[1], manfid[2], manfid[3]); + + return 0; +} +#endif /* ENABLE_IFC && !BUILD_LOADER_STAGE1 */ + static void hal_flash_init(void) { +#ifdef ENABLE_IFC /* IFC - NOR Flash */ /* LAW is also set in boot_ppc_start.S:flash_law */ set_law(1, FLASH_BASE_PHYS_HIGH, FLASH_BASE_ADDR, LAW_TRGT_IFC, LAW_SIZE_128MB, 1); @@ -142,13 +217,22 @@ static void hal_flash_init(void) IFC_FTIM2_NOR_TWP(28))); set32(IFC_FTIM3(0), 0); /* NOR IFC Definitions (CS0) */ - set32(IFC_CSPR_EXT(0), 0xF); + set32(IFC_CSPR_EXT(0), FLASH_BASE_PHYS_HIGH); set32(IFC_CSPR(0), (IFC_CSPR_PHYS_ADDR(FLASH_BASE_ADDR) | + #if FLASH_CFI_WIDTH == 16 IFC_CSPR_PORT_SIZE_16 | + #else + IFC_CSPR_PORT_SIZE_8 | + #endif IFC_CSPR_MSEL_NOR | IFC_CSPR_V)); set32(IFC_AMASK(0), IFC_AMASK_128MB); set32(IFC_CSOR(0), 0x0000000C); /* TRHZ (80 clocks for read enable high) */ + + #ifndef BUILD_LOADER_STAGE1 + hal_flash_getid(); + #endif +#endif /* ENABLE_IFC */ } static void hal_ddr_init(void) @@ -327,6 +411,10 @@ void hal_init(void) wolfBoot_printf("CPLD FW Rev: 0x%x\n", fw); #endif #endif /* ENABLE_CPLD */ + +#ifdef ENABLE_MP + hal_mp_init(); +#endif } static void hal_flash_unlock_sector(uint32_t sector) @@ -372,6 +460,7 @@ static int hal_flash_status_wait(uint32_t sector, uint16_t mask, int hal_flash_write(uint32_t address, const uint8_t *data, int len) { + int ret = 0; uint32_t i, pos, sector, offset, xfer, nwords; /* adjust for flash base */ @@ -420,16 +509,21 @@ int hal_flash_write(uint32_t address, const uint8_t *data, int len) /* Typical 410us */ /* poll for program completion - max 200ms */ - hal_flash_status_wait(sector, 0x44, 200*1000); + ret = hal_flash_status_wait(sector, 0x44, 200*1000); + if (ret != 0) { + wolfBoot_printf("Flash Write: Timeout at sector %d\n", sector); + break; + } address += xfer; len -= xfer; } - return 0; + return ret; } int hal_flash_erase(uint32_t address, int len) { + int ret = 0; uint32_t sector; /* adjust for flash base */ @@ -453,44 +547,141 @@ int hal_flash_erase(uint32_t address, int len) /* Typical is 200ms (max 1100ms) */ /* poll for erase completion - max 1.1 sec */ - hal_flash_status_wait(sector, 0x4C, 1100*1000); + ret = hal_flash_status_wait(sector, 0x4C, 1100*1000); + if (ret != 0) { + wolfBoot_printf("Flash Erase: Timeout at sector %d\n", sector); + break; + } address += FLASH_SECTOR_SIZE; len -= FLASH_SECTOR_SIZE; } - return 0; + return ret; } void hal_flash_unlock(void) { - /* Disable all flash protection bits */ - /* enter Non-volatile protection mode (C0h) */ - *((volatile uint16_t*)(FLASH_BASE_ADDR + 0xAAA)) = 0xAAAA; - *((volatile uint16_t*)(FLASH_BASE_ADDR + 0x554)) = 0x5555; - *((volatile uint16_t*)(FLASH_BASE_ADDR + 0xAAA)) = 0xC0C0; - /* clear all protection bit (80h/30h) */ - *((volatile uint16_t*)(FLASH_BASE_ADDR + 0x000)) = 0x8080; - *((volatile uint16_t*)(FLASH_BASE_ADDR + 0x000)) = 0x3030; - /* exit Non-volatile protection mode (90h/00h) */ - *((volatile uint16_t*)(FLASH_BASE_ADDR + 0x000)) = 0x9090; - *((volatile uint16_t*)(FLASH_BASE_ADDR + 0x000)) = 0x0000; + /* Per-sector unlock is done in hal_flash_write/erase before each operation. + * The previous non-volatile PPB protection mode (C0h) approach caused + * unnecessary wear on PPB cells since it was called on every boot. */ + hal_flash_unlock_sector(0); } void hal_flash_lock(void) { - /* Enable all flash protection bits */ - /* enter Non-volatile protection mode (C0h) */ - *((volatile uint16_t*)(FLASH_BASE_ADDR + 0xAAA)) = 0xAAAA; - *((volatile uint16_t*)(FLASH_BASE_ADDR + 0x554)) = 0x5555; - *((volatile uint16_t*)(FLASH_BASE_ADDR + 0xAAA)) = 0xC0C0; - /* set all protection bit (A0h/00h) */ - *((volatile uint16_t*)(FLASH_BASE_ADDR + 0x000)) = 0xA0A0; - *((volatile uint16_t*)(FLASH_BASE_ADDR + 0x000)) = 0x0000; - /* exit Non-volatile protection mode (90h/00h) */ - *((volatile uint16_t*)(FLASH_BASE_ADDR + 0x000)) = 0x9090; - *((volatile uint16_t*)(FLASH_BASE_ADDR + 0x000)) = 0x0000; + } +/* SMP Multi-Processor Driver */ +#ifdef ENABLE_MP + +/* from boot_ppc_mp.S */ +extern uint32_t _secondary_start_page; +extern uint32_t _second_half_boot_page; +extern uint32_t _spin_table; +extern uint32_t _spin_table_addr; +extern uint32_t _bootpg_addr; + +/* Startup additional cores with spin table and synchronize the timebase */ +static void hal_mp_up(uint32_t bootpg) +{ + uint32_t all_cores, active_cores, whoami; + int timeout = 50, i; + + whoami = get32(PIC_WHOAMI); /* Get current running core number */ + all_cores = ((1 << CPU_NUMCORES) - 1); /* mask of all cores */ + active_cores = (1 << whoami); /* current running cores */ + + wolfBoot_printf("MP: Starting cores (boot page %p, spin table %p)\n", + bootpg, (uint32_t)&_spin_table); + + /* Set the boot page translation register */ + set32(LCC_BSTRH, 0); + set32(LCC_BSTRL, bootpg); + set32(LCC_BSTAR, (LCC_BSTAR_EN | + LCC_BSTAR_LAWTRGT(LAW_TRGT_DDR_1) | + LAW_SIZE_4KB)); + (void)get32(LCC_BSTAR); /* read back to sync */ + + /* Enable time base on current core only */ + set32(RCPM_PCTBENR, (1 << whoami)); + + /* Release the CPU core(s) */ + set32(DCFG_BRR, all_cores); + __asm__ __volatile__("sync; isync; msync"); + + /* wait for other core(s) to start */ + while (timeout) { + for (i = 0; i < CPU_NUMCORES; i++) { + uint32_t* entry = (uint32_t*)( + (uint8_t*)&_spin_table + (i * ENTRY_SIZE) + ENTRY_ADDR_LOWER); + if (*entry) { + active_cores |= (1 << i); + } + } + if ((active_cores & all_cores) == all_cores) { + break; + } + + udelay(100); + timeout--; + } + + if (timeout == 0) { + wolfBoot_printf("MP: Timeout enabling additional cores!\n"); + } + + /* Disable all timebases */ + set32(RCPM_PCTBENR, 0); + + /* Reset our timebase */ + mtspr(SPRN_TBWU, 0); + mtspr(SPRN_TBWL, 0); + + /* Enable timebase for all cores */ + set32(RCPM_PCTBENR, all_cores); +} + +static void hal_mp_init(void) +{ + uint32_t *fixup = (uint32_t*)&_secondary_start_page; + uint32_t bootpg; + int i_tlb = 0; /* always 0 */ + size_t i; + const volatile uint32_t *s; + volatile uint32_t *d; + + /* Assign virtual boot page at end of DDR */ + bootpg = DDR_ADDRESS + DDR_SIZE - BOOT_ROM_SIZE; + + /* Store the boot page address for use by additional CPU cores */ + _bootpg_addr = (uint32_t)&_second_half_boot_page; + + /* Store location of spin table for other cores */ + _spin_table_addr = (uint32_t)&_spin_table; + + /* Flush bootpg before copying to invalidate any stale cache lines */ + flush_cache(bootpg, BOOT_ROM_SIZE); + + /* Map reset page to bootpg so we can copy code there */ + disable_tlb1(i_tlb); + set_tlb(1, i_tlb, BOOT_ROM_ADDR, bootpg, 0, /* tlb, epn, rpn, urpn */ + (MAS3_SX | MAS3_SW | MAS3_SR), (MAS2_I | MAS2_G), /* perms, wimge */ + 0, BOOKE_PAGESZ_4K, 1); /* ts, esel, tsize, iprot */ + + /* copy startup code to virtually mapped boot address */ + /* do not use memcpy due to compiler array bounds report (not valid) */ + s = (const uint32_t*)fixup; + d = (uint32_t*)BOOT_ROM_ADDR; + for (i = 0; i < BOOT_ROM_SIZE/4; i++) { + d[i] = s[i]; + } + + /* start core and wait for it to be enabled */ + hal_mp_up(bootpg); +} +#endif /* ENABLE_MP */ + void hal_prepare_boot(void) { @@ -501,4 +692,90 @@ void* hal_get_dts_address(void) { return (void*)WOLFBOOT_DTS_BOOT_ADDRESS; } -#endif + +int hal_dts_fixup(void* dts_addr) +{ +#ifndef BUILD_LOADER_STAGE1 + struct fdt_header *fdt = (struct fdt_header *)dts_addr; + int off; + uint32_t *reg; + + /* verify the FDT is valid */ + off = fdt_check_header(dts_addr); + if (off != 0) { + wolfBoot_printf("FDT: Invalid header! %d\n", off); + return off; + } + + /* display FDT information */ + wolfBoot_printf("FDT: Version %d, Size %d\n", + fdt_version(fdt), fdt_totalsize(fdt)); + + /* expand total size */ + fdt->totalsize += 2048; /* expand by 2KB */ + wolfBoot_printf("FDT: Expanded (2KB) to %d bytes\n", fdt->totalsize); + + /* fixup the memory region - single bank */ + off = fdt_find_devtype(fdt, -1, "memory"); + if (off != -FDT_ERR_NOTFOUND) { + /* build addr/size as 64-bit */ + uint8_t ranges[sizeof(uint64_t) * 2], *p = ranges; + *(uint64_t*)p = cpu_to_fdt64(DDR_ADDRESS); + p += sizeof(uint64_t); + *(uint64_t*)p = cpu_to_fdt64(DDR_SIZE); + p += sizeof(uint64_t); + wolfBoot_printf("FDT: Set memory, start=0x%x, size=0x%x\n", + DDR_ADDRESS, (uint32_t)DDR_SIZE); + fdt_setprop(fdt, off, "reg", ranges, (int)(p - ranges)); + } + + /* fixup CPU status and release address and enable method */ + off = fdt_find_devtype(fdt, -1, "cpu"); + while (off != -FDT_ERR_NOTFOUND) { + int core; + #ifdef ENABLE_MP + uint64_t core_spin_table; + #endif + + reg = (uint32_t*)fdt_getprop(fdt, off, "reg", NULL); + if (reg == NULL) + break; + core = (int)fdt32_to_cpu(*reg); + if (core >= CPU_NUMCORES) { + break; /* invalid core index */ + } + + #ifdef ENABLE_MP + /* calculate location of spin table for core */ + core_spin_table = (uint64_t)((uintptr_t)( + (uint8_t*)&_spin_table + (core * ENTRY_SIZE))); + + fdt_fixup_str(fdt, off, "cpu", "status", (core == 0) ? "okay" : "disabled"); + fdt_fixup_val64(fdt, off, "cpu", "cpu-release-addr", core_spin_table); + fdt_fixup_str(fdt, off, "cpu", "enable-method", "spin-table"); + #endif + fdt_fixup_val(fdt, off, "cpu", "timebase-frequency", TIMEBASE_HZ); + fdt_fixup_val(fdt, off, "cpu", "clock-frequency", hal_get_core_clk()); + fdt_fixup_val(fdt, off, "cpu", "bus-frequency", hal_get_plat_clk()); + + off = fdt_find_devtype(fdt, off, "cpu"); + } + + /* fixup the soc clock */ + off = fdt_find_devtype(fdt, -1, "soc"); + if (off != -FDT_ERR_NOTFOUND) { + fdt_fixup_val(fdt, off, "soc", "bus-frequency", hal_get_plat_clk()); + } + + /* fixup the serial clocks */ + off = fdt_find_devtype(fdt, -1, "serial"); + while (off != -FDT_ERR_NOTFOUND) { + fdt_fixup_val(fdt, off, "serial", "clock-frequency", hal_get_bus_clk()); + off = fdt_find_devtype(fdt, off, "serial"); + } + +#endif /* !BUILD_LOADER_STAGE1 */ + (void)dts_addr; + return 0; +} +#endif /* MMU */ diff --git a/hal/nxp_t2080.h b/hal/nxp_t2080.h index 13ebb63e98..0efab8404b 100644 --- a/hal/nxp_t2080.h +++ b/hal/nxp_t2080.h @@ -172,6 +172,14 @@ enum ifc_amask_sizes { #define SATA_ENBL ((volatile uint32_t*)0xB1003F4C) /* also saw 0xB4003F4C */ +/* ---- Boot Page Translation - T2080RM 4.4.9 ---- */ +#define LCC_BSTRH ((volatile uint32_t*)(CCSRBAR + 0x20)) /* Boot space translation register high */ +#define LCC_BSTRL ((volatile uint32_t*)(CCSRBAR + 0x24)) /* Boot space translation register low */ +#define LCC_BSTAR ((volatile uint32_t*)(CCSRBAR + 0x28)) /* Boot space translation attribute register */ +#define LCC_BSTAR_EN 0x80000000 +#define LCC_BSTAR_LAWTRGT(n) ((n) << 20) +#define LCC_BSTAR_LAWSZ(n) ((n) & 0x3F) + /* ---- DCFG (Device Configuration) - T2080RM 6.3 ---- */ #define DCFG_BASE (CCSRBAR + 0xE0000) #define DCFG_DCSR ((volatile uint32_t*)(DCFG_BASE + 0x704)) /* Debug Configuration and Status */ @@ -180,11 +188,22 @@ enum ifc_amask_sizes { #define DCFG_DEVDISR3 ((volatile uint32_t*)(DCFG_BASE + 0x078)) /* Device Disable Control 3 */ #define DCFG_DEVDISR4 ((volatile uint32_t*)(DCFG_BASE + 0x07C)) /* Device Disable Control 4 */ #define DCFG_DEVDISR5 ((volatile uint32_t*)(DCFG_BASE + 0x080)) /* Device Disable Control 5 */ +#define DCFG_BRR ((volatile uint32_t*)(DCFG_BASE + 0xE4)) /* Boot Release Register */ /* ---- RCPM (Run Control and Power Management) - T2080RM 6.4 ---- */ #define RCPM_BASE (CCSRBAR + 0xE2000) #define RCPM_PCTBENR ((volatile uint32_t*)(RCPM_BASE + 0x1A0)) /* Physical Core Timebase Enable */ +/* ---- Clocking - T2080RM 5.3 ---- */ +#define CLOCKING_BASE (CCSRBAR + 0xE1000) +#define CLOCKING_CLKCCSR(n) ((volatile uint32_t*)(CLOCKING_BASE + 0x000UL + ((n) * 0x20))) +#define CLOCKING_PLLCNGSR(n) ((volatile uint32_t*)(CLOCKING_BASE + 0x800UL + ((n) * 0x20))) /* PLL cluster n general status */ +#define CLOCKING_PLLPGSR ((volatile uint32_t*)(CLOCKING_BASE + 0xC00UL)) /* Platform PLL general status */ + +/* ---- MPIC - T2080RM 24.3 ---- */ +#define PIC_BASE (CCSRBAR + 0x40000) +#define PIC_WHOAMI ((volatile uint32_t*)(PIC_BASE + 0x0090UL)) + /* ---- DDR (T2080RM 12.4) ---- */ /* NAII 68PPC2 - 8GB discrete DDR3 IM8G08D3EBDG-15E */ From a90d60f940d465abb4aa742194557a1c5c922992 Mon Sep 17 00:00:00 2001 From: David Garske Date: Thu, 12 Feb 2026 07:14:35 -0800 Subject: [PATCH 05/11] Port improvements --- .github/workflows/test-configs.yml | 6 --- arch.mk | 4 +- config/examples/nxp-t2080-68ppc2.config | 55 --------------------- config/examples/nxp-t2080.config | 36 ++++++++++++-- docs/Targets.md | 8 +-- hal/nxp_ppc.h | 2 +- hal/nxp_t2080.c | 65 ++++++++++--------------- hal/nxp_t2080.ld | 4 ++ include/wolfboot/wolfboot.h | 2 + src/boot_ppc.c | 37 +++++++++++--- 10 files changed, 100 insertions(+), 119 deletions(-) delete mode 100644 config/examples/nxp-t2080-68ppc2.config diff --git a/.github/workflows/test-configs.yml b/.github/workflows/test-configs.yml index a84cde0ccc..88784d60a8 100644 --- a/.github/workflows/test-configs.yml +++ b/.github/workflows/test-configs.yml @@ -182,12 +182,6 @@ jobs: arch: ppc config-file: ./config/examples/nxp-t1024.config - nxp_t2080_68ppc2_test: - uses: ./.github/workflows/test-build.yml - with: - arch: ppc - config-file: ./config/examples/nxp-t2080-68ppc2.config - nxp_t2080_test: uses: ./.github/workflows/test-build.yml with: diff --git a/arch.mk b/arch.mk index 3f7fa93457..ec47d9d703 100644 --- a/arch.mk +++ b/arch.mk @@ -627,7 +627,7 @@ endif ifeq ($(ARCH),PPC) CROSS_COMPILE?=powerpc-linux-gnu- LDFLAGS+=-Wl,--build-id=none - CFLAGS+=-DARCH_PPC -DFAST_MEMCPY + CFLAGS+=-DARCH_PPC -DFAST_MEMCPY -ffreestanding -fno-tree-loop-distribute-patterns ifeq ($(DEBUG_UART),0) CFLAGS+=-fno-builtin-printf @@ -635,7 +635,7 @@ ifeq ($(ARCH),PPC) # Target-specific CPU flags ifeq ($(TARGET),nxp_t2080) - CFLAGS+=-mcpu=e6500 -mno-altivec + CFLAGS+=-mcpu=e6500 -mno-altivec -mbss-plt else ifeq ($(TARGET),nxp_t1024) CFLAGS+=-mcpu=e5500 endif diff --git a/config/examples/nxp-t2080-68ppc2.config b/config/examples/nxp-t2080-68ppc2.config deleted file mode 100644 index c4e2cbb01d..0000000000 --- a/config/examples/nxp-t2080-68ppc2.config +++ /dev/null @@ -1,55 +0,0 @@ -# NAII 68PPC2 NXP T2080 wolfBoot Configuration Template - -ARCH=PPC -TARGET=nxp_t2080 -SIGN?=ECC384 -HASH?=SHA384 -IMAGE_HEADER_SIZE?=512 -DEBUG?=0 -DEBUG_UART?=1 -VTOR?=1 -CORTEX_M0?=0 -NO_ASM?=0 -EXT_FLASH?=0 -SPI_FLASH?=0 -NO_XIP?=0 -UART_FLASH?=0 -ALLOW_DOWNGRADE?=0 -NVM_FLASH_WRITEONCE?=0 -WOLFBOOT_VERSION?=0 -NO_MPU?=0 -SPMATH?=0 -SPMATHALL?=1 -RAM_CODE?=1 -DUALBANK_SWAP?=0 -WOLFTPM?=0 - -# NOR Base Address -ARCH_FLASH_OFFSET?=0xE8000000 - -# Flash Sector Size -WOLFBOOT_SECTOR_SIZE=0x10000 - -# wolfBoot start address -WOLFBOOT_ORIGIN=0xEFF40000 -# wolfBoot partition size (custom) -BOOTLOADER_PARTITION_SIZE=0x20000 - -# Application Partition Size -WOLFBOOT_PARTITION_SIZE?=0xA00000 -# Location in Flash for Application Partition -WOLFBOOT_PARTITION_BOOT_ADDRESS?=0xE8080000 -# Load Partition to RAM Address -WOLFBOOT_LOAD_ADDRESS?=0x19000 - -# Location in Flash for Update Partition -WOLFBOOT_PARTITION_UPDATE_ADDRESS?=0xE8A80000 - -# Location of temporary sector used during updates -WOLFBOOT_PARTITION_SWAP_ADDRESS?=0xE8060000 - -# DTS (Device Tree) -WOLFBOOT_DTS_BOOT_ADDRESS?=0xE8040000 -WOLFBOOT_DTS_UPDATE_ADDRESS?=0xE8050000 -# DTS Load to RAM Address -WOLFBOOT_LOAD_DTS_ADDRESS?=0x40000 diff --git a/config/examples/nxp-t2080.config b/config/examples/nxp-t2080.config index f15f1acbf5..2e82d00412 100644 --- a/config/examples/nxp-t2080.config +++ b/config/examples/nxp-t2080.config @@ -1,8 +1,16 @@ +# NXP T2080 wolfBoot Configuration Template +# +# Stock (default): Compact layout, NOR base 0xEFFE0000 +# NAII 68PPC2 (alternate): Larger app partition, NOR base 0xE8000000 +# Uncomment the "# NAII 68PPC2:" lines and comment the stock lines to use. + ARCH=PPC TARGET=nxp_t2080 SIGN?=ECC384 HASH?=SHA384 +IMAGE_HEADER_SIZE?=512 DEBUG?=0 +DEBUG_SYMBOLS?=1 DEBUG_UART?=1 VTOR?=1 CORTEX_M0?=0 @@ -17,25 +25,43 @@ WOLFBOOT_VERSION?=0 NO_MPU?=0 SPMATH?=0 SPMATHALL?=1 -RAM_CODE?=0 +RAM_CODE?=1 DUALBANK_SWAP?=0 -PKA?=1 WOLFTPM?=0 -WOLFBOOT_ORIGIN?=0xEFFE0000 -WOLFBOOT_PARTITION_SIZE?=0x20000 -WOLFBOOT_SECTOR_SIZE?=0x10000 +# NOR Base Address ARCH_FLASH_OFFSET?=0xEFFE0000 +# NAII 68PPC2: ARCH_FLASH_OFFSET?=0xE8000000 + +# Flash Sector Size +WOLFBOOT_SECTOR_SIZE?=0x10000 + +# wolfBoot start address +WOLFBOOT_ORIGIN?=0xEFFE0000 +# NAII 68PPC2: WOLFBOOT_ORIGIN?=0xEFF40000 +# wolfBoot partition size (custom) BOOTLOADER_PARTITION_SIZE=0x20000 +# Application Partition Size +WOLFBOOT_PARTITION_SIZE?=0x20000 +# NAII 68PPC2: WOLFBOOT_PARTITION_SIZE?=0xA00000 +# Location in Flash for Application Partition WOLFBOOT_PARTITION_BOOT_ADDRESS?=0xEFFC0000 +# NAII 68PPC2: WOLFBOOT_PARTITION_BOOT_ADDRESS?=0xE8080000 +# Load Partition to RAM Address WOLFBOOT_LOAD_ADDRESS?=0x19000 + +# Location in Flash for Update Partition WOLFBOOT_PARTITION_UPDATE_ADDRESS?=0xEFFA0000 +# NAII 68PPC2: WOLFBOOT_PARTITION_UPDATE_ADDRESS?=0xE8A80000 # Location of temporary sector used during updates WOLFBOOT_PARTITION_SWAP_ADDRESS?=0xEFF90000 +# NAII 68PPC2: WOLFBOOT_PARTITION_SWAP_ADDRESS?=0xE8060000 # DTS (Device Tree) WOLFBOOT_DTS_BOOT_ADDRESS?=0xE8040000 WOLFBOOT_DTS_UPDATE_ADDRESS?=0xE8050000 +# DTS Load to RAM Address WOLFBOOT_LOAD_DTS_ADDRESS?=0x200000 +# NAII 68PPC2: WOLFBOOT_LOAD_DTS_ADDRESS?=0x40000 \ No newline at end of file diff --git a/docs/Targets.md b/docs/Targets.md index 0bfa40e4dd..356902ff1a 100644 --- a/docs/Targets.md +++ b/docs/Targets.md @@ -3061,9 +3061,8 @@ Flash factory_custom.bin to NOR base 0xEC00_0000 The NXP QorIQ T2080 is a PPC e6500 based processor (four cores). Support has been tested with the NAII 68PPC2. -Example configurations for this target are provided in: -* NXP T2080: [/config/examples/nxp-t2080.config](/config/examples/nxp-t2080.config). -* NAII 68PPC2: [/config/examples/nxp-t2080-68ppc2.config](/config/examples/nxp-t2080-68ppc2.config). +Example configuration: [/config/examples/nxp-t2080.config](/config/examples/nxp-t2080.config). +Stock layout is default; for NAII 68PPC2, uncomment the "# NAII 68PPC2:" lines and comment the stock lines. ### Design NXP T2080 PPC @@ -3080,9 +3079,10 @@ RM 4.3.3 Boot Space Translation By default wolfBoot will use `powerpc-linux-gnu-` cross-compiler prefix. These tools can be installed with the Debian package `gcc-powerpc-linux-gnu` (`sudo apt install gcc-powerpc-linux-gnu`). The `make` creates a `factory.bin` image that can be programmed at `0xE8080000` +(For NAII 68PPC2, first edit `nxp-t2080.config` to uncomment the NAII 68PPC2 lines.) ``` -cp ./config/examples/nxp-t2080-68ppc2.config .config +cp ./config/examples/nxp-t2080.config .config make clean make keytools make diff --git a/hal/nxp_ppc.h b/hal/nxp_ppc.h index a0d7c21b53..c392579f0f 100644 --- a/hal/nxp_ppc.h +++ b/hal/nxp_ppc.h @@ -133,7 +133,7 @@ #define ENABLE_DDR #ifndef DDR_SIZE - #define DDR_SIZE (8192UL * 1024UL * 1024UL) + #define DDR_SIZE (8192ULL * 1024ULL * 1024ULL) #endif #define FLASH_BASE_ADDR 0xE8000000UL diff --git a/hal/nxp_t2080.c b/hal/nxp_t2080.c index 4becdddcfe..839c2b1466 100644 --- a/hal/nxp_t2080.c +++ b/hal/nxp_t2080.c @@ -200,38 +200,24 @@ static int hal_flash_getid(void) static void hal_flash_init(void) { #ifdef ENABLE_IFC - /* IFC - NOR Flash */ - /* LAW is also set in boot_ppc_start.S:flash_law */ - set_law(1, FLASH_BASE_PHYS_HIGH, FLASH_BASE_ADDR, LAW_TRGT_IFC, LAW_SIZE_128MB, 1); - - /* NOR IFC Flash Timing Parameters */ - set32(IFC_FTIM0(0), (IFC_FTIM0_NOR_TACSE(4) | - IFC_FTIM0_NOR_TEADC(5) | - IFC_FTIM0_NOR_TEAHC(5))); - set32(IFC_FTIM1(0), (IFC_FTIM1_NOR_TACO(53) | - IFC_FTIM1_NOR_TRAD(26) | - IFC_FTIM1_NOR_TSEQ(19))); - set32(IFC_FTIM2(0), (IFC_FTIM2_NOR_TCS(4) | - IFC_FTIM2_NOR_TCH(4) | - IFC_FTIM2_NOR_TWPH(14) | - IFC_FTIM2_NOR_TWP(28))); - set32(IFC_FTIM3(0), 0); - /* NOR IFC Definitions (CS0) */ - set32(IFC_CSPR_EXT(0), FLASH_BASE_PHYS_HIGH); - set32(IFC_CSPR(0), (IFC_CSPR_PHYS_ADDR(FLASH_BASE_ADDR) | - #if FLASH_CFI_WIDTH == 16 - IFC_CSPR_PORT_SIZE_16 | - #else - IFC_CSPR_PORT_SIZE_8 | - #endif - IFC_CSPR_MSEL_NOR | - IFC_CSPR_V)); - set32(IFC_AMASK(0), IFC_AMASK_128MB); - set32(IFC_CSOR(0), 0x0000000C); /* TRHZ (80 clocks for read enable high) */ + /* IFC CS0 - NOR Flash + * Do NOT reprogram IFC CS0 (CSPR, AMASK, CSOR, FTIM) while executing + * from flash (XIP) with cache-inhibited TLB (MAS2_I|MAS2_G). The boot + * ROM already configured CS0 correctly. Reprogramming CSPR while XIP + * can cause instruction fetch failures because there is no cache to + * serve fetches during the chip-select decode transition. + * + * U-Boot avoids this by using MAS2_W|MAS2_G (write-through, cached) + * during XIP, only switching to MAS2_I|MAS2_G after relocating to RAM. + * + * The LAW is also already set in boot_ppc_start.S:flash_law. + */ - #ifndef BUILD_LOADER_STAGE1 - hal_flash_getid(); - #endif + /* Note: hal_flash_getid() is disabled because AMD Autoselect mode + * affects the entire flash bank. Since wolfBoot runs XIP from the same + * bank (CS0), entering Autoselect mode crashes instruction fetch. + * Flash write/erase operations will need RAMFUNCTION support. + * TODO: Implement RAMFUNCTION for flash operations on T2080. */ #endif /* ENABLE_IFC */ } @@ -578,7 +564,7 @@ void hal_flash_lock(void) /* from boot_ppc_mp.S */ extern uint32_t _secondary_start_page; extern uint32_t _second_half_boot_page; -extern uint32_t _spin_table; +extern uint32_t _spin_table[]; extern uint32_t _spin_table_addr; extern uint32_t _bootpg_addr; @@ -593,7 +579,7 @@ static void hal_mp_up(uint32_t bootpg) active_cores = (1 << whoami); /* current running cores */ wolfBoot_printf("MP: Starting cores (boot page %p, spin table %p)\n", - bootpg, (uint32_t)&_spin_table); + bootpg, (uint32_t)_spin_table); /* Set the boot page translation register */ set32(LCC_BSTRH, 0); @@ -614,7 +600,7 @@ static void hal_mp_up(uint32_t bootpg) while (timeout) { for (i = 0; i < CPU_NUMCORES; i++) { uint32_t* entry = (uint32_t*)( - (uint8_t*)&_spin_table + (i * ENTRY_SIZE) + ENTRY_ADDR_LOWER); + (uint8_t*)_spin_table + (i * ENTRY_SIZE) + ENTRY_ADDR_LOWER); if (*entry) { active_cores |= (1 << i); } @@ -651,14 +637,17 @@ static void hal_mp_init(void) const volatile uint32_t *s; volatile uint32_t *d; - /* Assign virtual boot page at end of DDR */ - bootpg = DDR_ADDRESS + DDR_SIZE - BOOT_ROM_SIZE; + /* Assign virtual boot page at end of LAW-mapped DDR region. + * DDR LAW maps 2GB (LAW_SIZE_2GB) starting at DDR_ADDRESS. + * DDR_SIZE may exceed 32-bit range (e.g. 8GB), so use the LAW-mapped + * size to ensure bootpg fits in 32 bits and is accessible. */ + bootpg = DDR_ADDRESS + 0x80000000UL - BOOT_ROM_SIZE; /* Store the boot page address for use by additional CPU cores */ _bootpg_addr = (uint32_t)&_second_half_boot_page; /* Store location of spin table for other cores */ - _spin_table_addr = (uint32_t)&_spin_table; + _spin_table_addr = (uint32_t)_spin_table; /* Flush bootpg before copying to invalidate any stale cache lines */ flush_cache(bootpg, BOOT_ROM_SIZE); @@ -748,7 +737,7 @@ int hal_dts_fixup(void* dts_addr) #ifdef ENABLE_MP /* calculate location of spin table for core */ core_spin_table = (uint64_t)((uintptr_t)( - (uint8_t*)&_spin_table + (core * ENTRY_SIZE))); + (uint8_t*)_spin_table + (core * ENTRY_SIZE))); fdt_fixup_str(fdt, off, "cpu", "status", (core == 0) ? "okay" : "disabled"); fdt_fixup_val64(fdt, off, "cpu", "cpu-release-addr", core_spin_table); diff --git a/hal/nxp_t2080.ld b/hal/nxp_t2080.ld index 65940daee8..e777d5c49b 100644 --- a/hal/nxp_t2080.ld +++ b/hal/nxp_t2080.ld @@ -63,6 +63,10 @@ SECTIONS { _start_data = .; KEEP(*(.data*)) + *(.got*) + *(.got2*) + *(.plt*) + *(.dynamic) . = ALIGN(4); KEEP(*(.ramcode)) . = ALIGN(4); diff --git a/include/wolfboot/wolfboot.h b/include/wolfboot/wolfboot.h index f380835910..9811c3b09e 100644 --- a/include/wolfboot/wolfboot.h +++ b/include/wolfboot/wolfboot.h @@ -47,6 +47,8 @@ extern "C" { # if defined(__WOLFBOOT) && defined(RAM_CODE) # if defined(ARCH_ARM) # define RAMFUNCTION __attribute__((used,section(".ramcode"),long_call)) +# elif defined(ARCH_PPC) +# define RAMFUNCTION __attribute__((used,section(".ramcode"),longcall)) # else # define RAMFUNCTION __attribute__((used,section(".ramcode"))) # endif diff --git a/src/boot_ppc.c b/src/boot_ppc.c index fe86cad08a..7e322f25d5 100644 --- a/src/boot_ppc.c +++ b/src/boot_ppc.c @@ -113,25 +113,46 @@ int WEAKFUNCTION hal_dts_fixup(void* dts_addr) } #endif +/* forward declaration */ +#ifndef BUILD_LOADER_STAGE1 +void flush_cache(uint32_t start_addr, uint32_t size); +#endif + void boot_entry_C(void) { - register unsigned int *dst, *src, *end; + volatile unsigned int *dst; + volatile const unsigned int *src; + volatile unsigned int *end; hal_early_init(); - /* Copy the .data section from flash to RAM */ - src = (unsigned int*)&_stored_data; - dst = (unsigned int*)&_start_data; - end = (unsigned int*)&_end_data; + /* Copy the .data section from flash to RAM. + * Use volatile to prevent the compiler from transforming this loop + * into a memcpy() call — memcpy is RAMFUNCTION in .data and hasn't + * been copied to DDR yet at this point. */ + src = (volatile const unsigned int*)&_stored_data; + dst = (volatile unsigned int*)&_start_data; + end = (volatile unsigned int*)&_end_data; while (dst < end) { *dst = *src; dst++; src++; } - /* Initialize the BSS section to 0 */ - dst = (unsigned int*)&__bss_start__; - end = (unsigned int*)&__bss_end__; +#ifndef BUILD_LOADER_STAGE1 + /* Flush D-cache and invalidate I-cache for .data region. + * The .ramcode section (RAMFUNCTION code like memcpy) is within .data + * and was just copied to DDR through D-cache. Without this flush, the + * I-cache will fetch stale/uninitialized DDR content when calling + * RAMFUNCTION code, causing instruction fetch failures. + * PowerPC I/D caches are not coherent — explicit dcbst+icbi required. */ + flush_cache((uint32_t)&_start_data, + (uint32_t)&_end_data - (uint32_t)&_start_data); +#endif + + /* Initialize the BSS section to 0 (volatile prevents memset transform) */ + dst = (volatile unsigned int*)&__bss_start__; + end = (volatile unsigned int*)&__bss_end__; while (dst < end) { *dst = 0U; dst++; From ec28be7e49b1ebeee5e397085dbf674c216b3f51 Mon Sep 17 00:00:00 2001 From: David Garske Date: Tue, 17 Feb 2026 16:20:38 -0800 Subject: [PATCH 06/11] Progress with T2080 port refresh --- hal/nxp_ppc.h | 24 +++++---- hal/nxp_t2080.c | 6 +-- hal/nxp_t2080.ld | 27 +++++++--- src/boot_ppc.c | 38 ++++++++++---- src/boot_ppc_start.S | 120 +++++++++++++++++++++++++++++++++---------- 5 files changed, 158 insertions(+), 57 deletions(-) diff --git a/hal/nxp_ppc.h b/hal/nxp_ppc.h index c392579f0f..6ee8d11de3 100644 --- a/hal/nxp_ppc.h +++ b/hal/nxp_ppc.h @@ -118,16 +118,17 @@ #define ENABLE_L1_CACHE #define ENABLE_L2_CACHE - #define L2SRAM_ADDR (0xF8F00000UL) /* CPC as SRAM (1MB) */ - #define L2SRAM_SIZE (1024UL * 1024UL) + /* T2080 CPC SRAM config - 512KB per T2080RM */ + #define L2SRAM_ADDR (0xF8F80000UL) /* CPC as SRAM (512KB) */ + #define L2SRAM_SIZE (512UL * 1024UL) #define INITIAL_SRAM_ADDR L2SRAM_ADDR /* CPC SRAM transactions traverse the CoreNet interconnect, which - * requires a LAW to route them. LAW_TRGT_DDR_1 is used as a routing - * target; the CPC intercepts the transaction before it reaches DDR. */ - #define INITIAL_SRAM_LAW_SZ LAW_SIZE_1MB + * requires a LAW to route them. LAW_TRGT_DDR_1 (0x10) is the CPC + * target per T2080RM Table 2-2 (Target ID Assignments). */ + #define INITIAL_SRAM_LAW_SZ LAW_SIZE_512KB #define INITIAL_SRAM_LAW_TRGT LAW_TRGT_DDR_1 - #define INITIAL_SRAM_BOOKE_SZ BOOKE_PAGESZ_1M + #define INITIAL_SRAM_BOOKE_SZ BOOKE_PAGESZ_512K #define ENABLE_INTERRUPTS @@ -303,6 +304,7 @@ #ifdef CORE_E6500 /* T2080: 2MB CPC, 16 ways, 128KB per way */ #define CPCSRCR0_SRAMSZ_256 (0x1 << 1) /* ways 14-15, 256KB */ + #define CPCSRCR0_SRAMSZ_512 (0x2 << 1) /* ways 12-15, 512KB */ #define CPCSRCR0_SRAMSZ_1024 (0x3 << 1) /* ways 8-15, 1MB */ #define CPCSRCR0_SRAMSZ_2048 (0x4 << 1) /* ways 0-15, 2MB */ #else /* CORE E5500 */ @@ -691,12 +693,12 @@ extern void dcache_disable(void); #else /* Assembly version */ #ifdef CORE_E6500 -/* e6500 has 64-bit MAS registers - must clear upper 32 bits. - * Using lis would sign-extend values with bit 15 set (e.g., 0xC000xxxx). - * Use li 0; oris; ori pattern for all MAS registers. */ +/* e6500 has 64-bit MAS registers. On 64-bit PPC, lis sign-extends to 64 bits. + * Any MAS value with bit 31 set (MAS1=0xC..., MAS2/MAS3 high addresses) gets + * upper 32 bits = 0xFFFFFFFF. Hardware may require reserved upper bits = 0. + * Use "li 0; oris; ori" pattern for MAS1, MAS2, MAS3 to avoid sign-extension. */ #define set_tlb(tlb, esel, epn, rpn, urpn, perms, winge, ts, tsize, iprot, reg) \ - li reg, 0; \ - oris reg, reg, BOOKE_MAS0(tlb, esel, 0)@h; \ + lis reg, BOOKE_MAS0(tlb, esel, 0)@h; \ ori reg, reg, BOOKE_MAS0(tlb, esel, 0)@l; \ mtspr MAS0, reg;\ li reg, 0; \ diff --git a/hal/nxp_t2080.c b/hal/nxp_t2080.c index 839c2b1466..caee6ae51b 100644 --- a/hal/nxp_t2080.c +++ b/hal/nxp_t2080.c @@ -221,7 +221,7 @@ static void hal_flash_init(void) #endif /* ENABLE_IFC */ } -static void hal_ddr_init(void) +void hal_ddr_init(void) { #ifdef ENABLE_DDR uint32_t reg; @@ -358,8 +358,8 @@ static void hal_cpld_init(void) set32(IFC_AMASK(3), IFC_AMASK_64KB); set32(IFC_CSOR(3), 0); - /* IFC - CPLD */ - set_law(2, CPLD_BASE_PHYS_HIGH, CPLD_BASE, + /* IFC - CPLD (use LAW 5; LAW 2 is used for CPC SRAM) */ + set_law(5, CPLD_BASE_PHYS_HIGH, CPLD_BASE, LAW_TRGT_IFC, LAW_SIZE_4KB, 1); /* CPLD - TBL=1, Entry 17 */ diff --git a/hal/nxp_t2080.ld b/hal/nxp_t2080.ld index e777d5c49b..2294421624 100644 --- a/hal/nxp_t2080.ld +++ b/hal/nxp_t2080.ld @@ -13,8 +13,9 @@ MEMORY { FLASH (rx) : ORIGIN = @WOLFBOOT_ORIGIN@, LENGTH = @BOOTLOADER_PARTITION_SIZE@ - /* CPC as SRAM - 1MB */ - RAM (rwx) : ORIGIN = 0xF8F00000, LENGTH = 0x100000 + /* CPC as SRAM - 512KB (T2080 supports up to 2MB, using 512KB) + * Layout: .ramcode at bottom, stack grows down from top */ + RAM (rwx) : ORIGIN = 0xF8F80000, LENGTH = 0x80000 /* DDR - 2GB */ DRAM (rwx) : ORIGIN = 0x00000000, LENGTH = 0x7FFFFFFF @@ -57,7 +58,21 @@ SECTIONS .gnu.hash : { *(.gnu.hash) } .rela.dyn : { *(.rela.dyn) } - _stored_data = .; + /* Store flash location for .ramcode copy */ + _stored_ramcode = .; + + /* RAMFUNCTION code in CPC SRAM - copied before DDR is used + * This ensures memcpy/memmove are available early */ + .ramcode : AT (_stored_ramcode) + { + _start_ramcode = .; + KEEP(*(.ramcode)) + . = ALIGN(4); + _end_ramcode = .; + } > RAM + + /* Calculate where .data starts in flash (after .ramcode) */ + _stored_data = _stored_ramcode + (_end_ramcode - _start_ramcode); .data : AT (_stored_data) { @@ -68,8 +83,6 @@ SECTIONS *(.plt*) *(.dynamic) . = ALIGN(4); - KEEP(*(.ramcode)) - . = ALIGN(4); _end_data = .; } > DRAM @@ -88,5 +101,7 @@ SECTIONS } -PROVIDE(_start_heap = ORIGIN(RAM)); +/* Heap starts after .ramcode in CPC SRAM */ +PROVIDE(_start_heap = _end_ramcode); +/* Stack at top of CPC SRAM, grows down */ PROVIDE(_end_stack = ORIGIN(RAM) + (LENGTH(RAM)) ); diff --git a/src/boot_ppc.c b/src/boot_ppc.c index 7e322f25d5..810421e1c1 100644 --- a/src/boot_ppc.c +++ b/src/boot_ppc.c @@ -31,6 +31,10 @@ extern unsigned int __bss_end__; extern unsigned int _stored_data; extern unsigned int _start_data; extern unsigned int _end_data; +/* .ramcode section (RAMFUNCTION) - may be in separate memory region */ +extern unsigned int _stored_ramcode; +extern unsigned int _start_ramcode; +extern unsigned int _end_ramcode; extern void main(void); extern void hal_early_init(void); @@ -124,12 +128,33 @@ void boot_entry_C(void) volatile const unsigned int *src; volatile unsigned int *end; + /* Copy .ramcode section FIRST - to CPC SRAM which is already available. + * This makes RAMFUNCTION code (memcpy, memmove) available before DDR. + * Use volatile to prevent compiler from transforming to memcpy call. */ + src = (volatile const unsigned int*)&_stored_ramcode; + dst = (volatile unsigned int*)&_start_ramcode; + end = (volatile unsigned int*)&_end_ramcode; + while (dst < end) { + *dst = *src; + dst++; + src++; + } + +#ifndef BUILD_LOADER_STAGE1 + /* Flush D-cache and invalidate I-cache for .ramcode in CPC SRAM. + * PowerPC I/D caches are not coherent — explicit dcbst+icbi required. */ + if ((uint32_t)&_end_ramcode > (uint32_t)&_start_ramcode) { + flush_cache((uint32_t)&_start_ramcode, + (uint32_t)&_end_ramcode - (uint32_t)&_start_ramcode); + } +#endif + + /* Now initialize DDR and other hardware */ hal_early_init(); - /* Copy the .data section from flash to RAM. + /* Copy the .data section from flash to DDR. * Use volatile to prevent the compiler from transforming this loop - * into a memcpy() call — memcpy is RAMFUNCTION in .data and hasn't - * been copied to DDR yet at this point. */ + * into a memcpy() call. */ src = (volatile const unsigned int*)&_stored_data; dst = (volatile unsigned int*)&_start_data; end = (volatile unsigned int*)&_end_data; @@ -140,12 +165,7 @@ void boot_entry_C(void) } #ifndef BUILD_LOADER_STAGE1 - /* Flush D-cache and invalidate I-cache for .data region. - * The .ramcode section (RAMFUNCTION code like memcpy) is within .data - * and was just copied to DDR through D-cache. Without this flush, the - * I-cache will fetch stale/uninitialized DDR content when calling - * RAMFUNCTION code, causing instruction fetch failures. - * PowerPC I/D caches are not coherent — explicit dcbst+icbi required. */ + /* Flush D-cache and invalidate I-cache for .data region in DDR. */ flush_cache((uint32_t)&_start_data, (uint32_t)&_end_data - (uint32_t)&_start_data); #endif diff --git a/src/boot_ppc_start.S b/src/boot_ppc_start.S index 645876e78c..e862062459 100644 --- a/src/boot_ppc_start.S +++ b/src/boot_ppc_start.S @@ -560,58 +560,91 @@ flash_tlb: #endif #endif /* ENABLE_DDR */ +/* ========================================================================= + * CPC SRAM Initialization + * Order: 1) CPC invalidate, 2) CPCSRCR config, 3) LAW, 4) TLB, 5) CPC enable + * Note: TLB must be created BEFORE CPC enable (original working sequence) + * ========================================================================= */ +#if defined(ENABLE_L2_CACHE) && defined(L2SRAM_ADDR) && (defined(CORE_E5500) || defined(CORE_E6500)) +cpc_setup_sram: + /* T2080RM: 8.4.2.2 - CPC initialization sequence: + * Step 1: Flash invalidate CPC and clear locks (CPCFI | CPCLFC) + * Step 2: Poll until invalidate completes + * Step 3: Configure SRAM control registers (CPCSRCR1, CPCSRCR0) + * Step 4: Configure LAW for SRAM routing (done after this block) + * Step 5: Enable CPC with parity (CPCE | CPCPE) + * Step 6: Create TLB for SRAM access + * The LAW (DDR_1) provides CoreNet routing; CPC intercepts before DDR. */ + + /* R1 = CPC base - preserve across LAW setup */ + LOAD_ADDR32(r1, CPC_BASE) + + /* Step 1: Flash invalidate CPC and clear all locks */ + lis r0, (CPCCSR0_CPCFI | CPCCSR0_CPCLFC)@h + ori r0, r0, (CPCCSR0_CPCFI | CPCCSR0_CPCLFC)@l + stw r0, CPCCSR0(r1) + + /* Step 2: Poll until CPCFI and CPCLFC clear */ +cpc_poll_invalidate: + lwz r2, CPCCSR0(r1) + and. r2, r2, r0 + bne cpc_poll_invalidate + isync + + /* Step 3: Configure CPC SRAM control registers */ + li r0, 0 + stw r0, CPCSRCR1(r1) /* SRAM high address = 0 */ + /* SRAM low address - use LOAD_ADDR32 on e6500 to avoid sign extension */ + LOAD_ADDR32(r0, L2SRAM_ADDR) + /* Enable SRAM and set size (must match L2SRAM_SIZE = 512KB) */ + ori r0, r0, (CPCSRCR0_SRAMSZ_512 | CPCSRCR0_SRAMEN) + stw r0, CPCSRCR0(r1) + mbar + isync +#endif /* ENABLE_L2_CACHE && L2SRAM_ADDR */ + +/* Step 3: Configure LAW for SRAM */ #ifdef INITIAL_SRAM_ADDR #ifndef INITIAL_SRAM_NO_LAW init_sram_law: - /* Intial SRAM LAW 2 */ + /* CPC SRAM uses LAW 2 - DO NOT reuse this LAW index elsewhere! + * The stack resides in CPC SRAM; overwriting this LAW causes crashes. */ #define INITIAL_SRAM_LAW (LAWAR_ENABLE | \ LAWAR_TRGT_ID(INITIAL_SRAM_LAW_TRGT) | \ INITIAL_SRAM_LAW_SZ) LOAD_ADDR32(r9, CCSRBAR + LAWBAR_BASE(2)) li r0, 0 /* UPPER=0 */ /* Use LOAD_ADDR32 on e6500 to avoid sign-extension for addresses >= 0x80000000 */ - LOAD_ADDR32(r1, INITIAL_SRAM_ADDR) + LOAD_ADDR32(r3, INITIAL_SRAM_ADDR) LOAD_ADDR32(r2, INITIAL_SRAM_LAW) stw r0, 0(r9) /* LAWBARH */ - stw r1, 4(r9) /* LAWBARL */ + stw r3, 4(r9) /* LAWBARL */ sync stw r2, 8(r9) /* LAWAR */ /* read back LAWAR (per 2.3.2 Configuring Local Access Windows) */ lwz r2, 8(r9) isync #endif /* !INITIAL_SRAM_NO_LAW */ +#endif /* INITIAL_SRAM_ADDR */ +/* Step 4: Create TLB for SRAM - BEFORE CPC enable (original working order) + * This is for e5500/e6500 CPC SRAM only. e500 has its own init_sram_tlb below. */ +#if defined(INITIAL_SRAM_ADDR) && (defined(CORE_E5500) || defined(CORE_E6500)) init_sram_tlb: /* Initial SRAM: TLB 1, Entry 9, Supervisor X/R/W, M, TS=0, IPROT - * CPC SRAM uses cacheable memory-coherent (M) access. - * TLB is created BEFORE l2_setup_sram per old working code. */ + * Original working T2080 code (commit 11f46a51) used MAS2_M. */ set_tlb(1, 9, INITIAL_SRAM_ADDR, INITIAL_SRAM_ADDR, 0, MAS3_SX | MAS3_SW | MAS3_SR, MAS2_M, 0, INITIAL_SRAM_BOOKE_SZ, 1, r3); -#endif - -#ifdef ENABLE_L2_CACHE +#endif /* INITIAL_SRAM_ADDR && (CORE_E5500 || CORE_E6500) */ -#if defined(CORE_E5500) || defined(CORE_E6500) /* --- L2 E5500/E6500 --- */ -#ifdef L2SRAM_ADDR -l2_setup_sram: - /* T2080RM: 8.4.2.2 - CPC initialization - * Configure SRAM control registers, then enable CPC with parity. - * The LAW (DDR_1) provides CoreNet routing; CPC intercepts before DDR. - * SRAM is zeroed later via dcbz through cacheable TLB (MAS2_M). */ +/* Step 5: Enable CPC after TLB is configured */ +#if defined(ENABLE_L2_CACHE) && defined(L2SRAM_ADDR) && (defined(CORE_E5500) || defined(CORE_E6500)) +cpc_enable: /* R1 = CPC base */ LOAD_ADDR32(r1, CPC_BASE) - /* Configure CPC SRAM control registers */ - li r0, 0 - stw r0, CPCSRCR1(r1) /* SRAM high address = 0 */ - /* SRAM low address - use LOAD_ADDR32 on e6500 to avoid sign extension */ - LOAD_ADDR32(r0, L2SRAM_ADDR) - /* Enable SRAM and set size (must match L2SRAM_SIZE) */ - ori r0, r0, (CPCSRCR0_SRAMSZ_1024 | CPCSRCR0_SRAMEN) - stw r0, CPCSRCR0(r1) - /* Enable CPC with parity */ lis r0, (CPCCSR0_CPCE | CPCCSR0_CPCPE)@h mbar @@ -619,11 +652,24 @@ l2_setup_sram: stw r0, CPCCSR0(r1) mbar + /* Verify CPC is enabled by reading back CPCCSR0 */ +cpc_poll_enable: + lwz r2, CPCCSR0(r1) + andis. r2, r2, CPCCSR0_CPCE@h /* check CPCE bit */ + beq cpc_poll_enable + isync + /* Disable speculation (Errata A-006593) */ lwz r0, CPCHDBCR0(r1) oris r0, r0, CPCHDBCR0_SPEC_DIS@h stw r0, CPCHDBCR0(r1) -#endif /* L2SRAM_ADDR */ + mbar + isync +#endif /* ENABLE_L2_CACHE && L2SRAM_ADDR */ + +#ifdef ENABLE_L2_CACHE +#if defined(CORE_E5500) || defined(CORE_E6500) /* --- L2 E5500/E6500 --- */ +/* Note: CPC SRAM setup moved above for correct T2080RM sequence */ #if defined(CORE_E6500) /* --- L2 E6500 --- */ l2_setup_cache: @@ -653,6 +699,13 @@ l2_poll_invclear: isync LOAD_ADDR32(r4, (L2CSR0_L2E | L2CSR0_L2PE)) stw r4, L2CSR0(r5) + mbar + + /* Verify L2 is enabled by reading back L2CSR0 */ +l2_poll_enable: + lwz r3, L2CSR0(r5) + andis. r3, r3, L2CSR0_L2E@h /* check bit 31 (L2E) */ + beq l2_poll_enable /* loop until enabled */ isync #elif defined(CORE_E5500) /* --- L2 E5500 --- */ @@ -714,6 +767,17 @@ l2_setup_sram: stw r1, L2SRBAR0(r5) mbar #endif /* L2SRAM_ADDR */ + +#ifdef INITIAL_SRAM_ADDR +init_sram_tlb: + /* Initial SRAM: TLB 1, Entry 9, Supervisor X/R/W, M, TS=0, IPROT + * For e500, L2 SRAM uses cacheable memory-coherent (M) access. + * TLB is created AFTER l2_setup_sram configures L2 as SRAM. */ + set_tlb(1, 9, + INITIAL_SRAM_ADDR, INITIAL_SRAM_ADDR, 0, + MAS3_SX | MAS3_SW | MAS3_SR, MAS2_M, 0, + INITIAL_SRAM_BOOKE_SZ, 1, r3); +#endif /* INITIAL_SRAM_ADDR */ #endif /* CORE_E500 */ #endif /* ENABLE_L2_CACHE */ @@ -776,9 +840,9 @@ cache_sram_init_loop: bdnz cache_sram_init_loop #elif defined(L2SRAM_ADDR) cache_sram_init: - /* Zero CPC SRAM via cache (MAS2_M = cacheable, memory coherent). - * dcbz allocates zeroed cache lines without reading from CPC, - * avoiding ECC/parity issues from uninitialized SRAM. */ + /* Zero CPC SRAM via dcbz. + * With MAS2_M (memory-coherent), dcbz allocates zeroed cache lines. + * This initializes SRAM and avoids ECC/parity issues from uninitialized data. */ LOAD_ADDR32(r3, L2SRAM_ADDR) li r0, 0 LOAD_ADDR32(r2, (L2SRAM_SIZE / CACHE_LINE_SIZE)) From 51136f178466986a9ff312a03253eefb1a44291a Mon Sep 17 00:00:00 2001 From: David Garske Date: Wed, 18 Feb 2026 14:07:53 -0800 Subject: [PATCH 07/11] Progress with DDR (working now on NAII 6xppc2) --- hal/nxp_ppc.h | 14 +++-- hal/nxp_t2080.c | 130 ++++++++++++++++++++++++++++++++++++++++++- hal/nxp_t2080.h | 23 ++++---- hal/nxp_t2080.ld | 4 +- src/boot_ppc_start.S | 4 +- 5 files changed, 155 insertions(+), 20 deletions(-) diff --git a/hal/nxp_ppc.h b/hal/nxp_ppc.h index 6ee8d11de3..2d08afe741 100644 --- a/hal/nxp_ppc.h +++ b/hal/nxp_ppc.h @@ -118,17 +118,17 @@ #define ENABLE_L1_CACHE #define ENABLE_L2_CACHE - /* T2080 CPC SRAM config - 512KB per T2080RM */ - #define L2SRAM_ADDR (0xF8F80000UL) /* CPC as SRAM (512KB) */ - #define L2SRAM_SIZE (512UL * 1024UL) + /* T2080 CPC SRAM config - 1MB for ECC P384 stack requirements */ + #define L2SRAM_ADDR (0xF8F00000UL) /* CPC as SRAM (1MB) */ + #define L2SRAM_SIZE (1024UL * 1024UL) #define INITIAL_SRAM_ADDR L2SRAM_ADDR /* CPC SRAM transactions traverse the CoreNet interconnect, which * requires a LAW to route them. LAW_TRGT_DDR_1 (0x10) is the CPC * target per T2080RM Table 2-2 (Target ID Assignments). */ - #define INITIAL_SRAM_LAW_SZ LAW_SIZE_512KB + #define INITIAL_SRAM_LAW_SZ LAW_SIZE_1MB #define INITIAL_SRAM_LAW_TRGT LAW_TRGT_DDR_1 - #define INITIAL_SRAM_BOOKE_SZ BOOKE_PAGESZ_512K + #define INITIAL_SRAM_BOOKE_SZ BOOKE_PAGESZ_1M #define ENABLE_INTERRUPTS @@ -593,6 +593,10 @@ #endif #define mtspr(rn, v) __asm__ __volatile__("mtspr " WC_STRINGIFY(rn) ",%0" : : "r" (v)) +#define mfspr(rn) ({ \ + unsigned int rval; \ + __asm__ __volatile__("mfspr %0," WC_STRINGIFY(rn) : "=r" (rval)); rval; \ +}) #define mfmsr() ({ \ unsigned int rval; \ diff --git a/hal/nxp_t2080.c b/hal/nxp_t2080.c index caee6ae51b..6428b88a7c 100644 --- a/hal/nxp_t2080.c +++ b/hal/nxp_t2080.c @@ -230,7 +230,8 @@ void hal_ddr_init(void) set_law(4, 0, DDR_ADDRESS, LAW_TRGT_DDR_1, LAW_SIZE_2GB, 0); /* If DDR is already enabled then just return */ - if (get32(DDR_SDRAM_CFG) & DDR_SDRAM_CFG_MEM_EN) { + reg = get32(DDR_SDRAM_CFG); + if (reg & DDR_SDRAM_CFG_MEM_EN) { return; } @@ -369,6 +370,129 @@ static void hal_cpld_init(void) #endif } +#if defined(DEBUG_UART) && defined(ENABLE_DDR) +/* DDR memory test - writes patterns and verifies readback */ +static int hal_ddr_test(void) +{ + volatile uint32_t *ddr = (volatile uint32_t *)DDR_ADDRESS; + uint32_t patterns[] = {0x55555555, 0xAAAAAAAA, 0x12345678, 0xDEADBEEF}; + uint32_t test_offsets[] = {0, 0x100, 0x1000, 0x10000, 0x100000, 0x1000000}; + int i, j; + int errors = 0; + uint32_t reg; + + /* Show DDR controller status */ + reg = get32(DDR_SDRAM_CFG); + wolfBoot_printf("DDR: SDRAM_CFG=0x%x (MEM_EN=%d)\n", reg, + (reg & DDR_SDRAM_CFG_MEM_EN) ? 1 : 0); + reg = get32(DDR_SDRAM_CFG_2); + wolfBoot_printf("DDR: SDRAM_CFG_2=0x%x (D_INIT=%d)\n", reg, + (reg & DDR_SDRAM_CFG_2_D_INIT) ? 1 : 0); + + /* Show DDR LAW configuration (LAW 4) */ + wolfBoot_printf("DDR LAW4: H=0x%x L=0x%x AR=0x%x\n", + get32(LAWBARH(4)), get32(LAWBARL(4)), get32(LAWAR(4))); + + /* Read DDR TLB entry 12 using tlbre */ + { + uint32_t mas0, mas1, mas2, mas3, mas7; + /* Select TLB1, entry 12 */ + mas0 = (1 << 28) | (12 << 16); /* TLBSEL=1, ESEL=12 */ + mtspr(MAS0, mas0); + __asm__ __volatile__("isync; tlbre; isync"); + mas1 = mfspr(MAS1); + mas2 = mfspr(MAS2); + mas3 = mfspr(MAS3); + mas7 = mfspr(MAS7); + wolfBoot_printf("DDR TLB12: MAS1=0x%x MAS2=0x%x MAS3=0x%x MAS7=0x%x\n", + mas1, mas2, mas3, mas7); + /* Check if TLB entry is valid */ + if (!(mas1 & 0x80000000)) { + wolfBoot_printf("DDR: ERROR - TLB12 not valid!\n"); + return -1; + } + } + + /* Check if DDR is enabled */ + if (!(get32(DDR_SDRAM_CFG) & DDR_SDRAM_CFG_MEM_EN)) { + wolfBoot_printf("DDR: ERROR - Memory not enabled!\n"); + return -1; + } + + /* Check if DDR LAW is enabled */ + reg = get32(LAWAR(4)); + if (!(reg & LAWAR_ENABLE)) { + wolfBoot_printf("DDR: ERROR - LAW4 not enabled!\n"); + return -1; + } + + /* Show DDR chip select configuration */ + wolfBoot_printf("DDR CS0: BNDS=0x%x CFG=0x%x\n", + get32(DDR_CS_BNDS(0)), get32(DDR_CS_CONFIG(0))); + wolfBoot_printf("DDR CS1: BNDS=0x%x CFG=0x%x\n", + get32(DDR_CS_BNDS(1)), get32(DDR_CS_CONFIG(1))); + + /* Show DDR debug status registers */ + wolfBoot_printf("DDR DDRDSR_1=0x%x DDRDSR_2=0x%x\n", + get32(DDR_DDRDSR_1), get32(DDR_DDRDSR_2)); + wolfBoot_printf("DDR DDRCDR_1=0x%x DDRCDR_2=0x%x\n", + get32(DDR_DDRCDR_1), get32(DDR_DDRCDR_2)); + + /* Check for pre-existing DDR errors */ + reg = get32(DDR_ERR_DETECT); + wolfBoot_printf("DDR ERR_DETECT=0x%x\n", reg); + if (reg != 0) { + wolfBoot_printf("DDR: ERROR - Pre-existing DDR errors!\n"); + wolfBoot_printf(" Bit 31 (MME): %d - Multiple errors\n", (reg >> 31) & 1); + wolfBoot_printf(" Bit 7 (APE): %d - Address parity\n", (reg >> 7) & 1); + wolfBoot_printf(" Bit 3 (ACE): %d - Auto calibration\n", (reg >> 3) & 1); + wolfBoot_printf(" Bit 2 (CDE): %d - Correctable data\n", (reg >> 2) & 1); + wolfBoot_printf("DDR: Skipping memory test due to errors\n"); + return -1; + } + + wolfBoot_printf("DDR Test: base=0x%x\n", DDR_ADDRESS); + wolfBoot_printf("DDR: Attempting simple read at 0x%x...\n", DDR_ADDRESS); + + /* First just try to read - don't write yet */ + { + volatile uint32_t val = *ddr; + wolfBoot_printf("DDR: Read returned 0x%x\n", val); + } + + for (i = 0; i < (int)(sizeof(test_offsets)/sizeof(test_offsets[0])); i++) { + uint32_t offset = test_offsets[i]; + volatile uint32_t *addr = ddr + (offset / sizeof(uint32_t)); + + for (j = 0; j < (int)(sizeof(patterns)/sizeof(patterns[0])); j++) { + uint32_t pattern = patterns[j]; + uint32_t readback; + + /* Write pattern */ + *addr = pattern; + __asm__ __volatile__("sync" ::: "memory"); + + /* Read back */ + readback = *addr; + + if (readback != pattern) { + wolfBoot_printf(" FAIL: @0x%x wrote 0x%x read 0x%x\n", + (uint32_t)addr, pattern, readback); + errors++; + } + } + } + + if (errors == 0) { + wolfBoot_printf("DDR Test: PASSED\n"); + } else { + wolfBoot_printf("DDR Test: FAILED (%d errors)\n", errors); + } + + return errors; +} +#endif /* DEBUG_UART && ENABLE_DDR */ + void hal_init(void) { #if defined(DEBUG_UART) && defined(ENABLE_CPLD) @@ -401,6 +525,10 @@ void hal_init(void) #ifdef ENABLE_MP hal_mp_init(); #endif + +#if defined(DEBUG_UART) && defined(ENABLE_DDR) + hal_ddr_test(); +#endif } static void hal_flash_unlock_sector(uint32_t sector) diff --git a/hal/nxp_t2080.h b/hal/nxp_t2080.h index 0efab8404b..1b10e9b717 100644 --- a/hal/nxp_t2080.h +++ b/hal/nxp_t2080.h @@ -233,6 +233,7 @@ enum ifc_amask_sizes { #define DDR_TRTP_PS 7500 #define DDR_REF_RATE_PS 7800000 +/* DDR values from working U-Boot on NAII 68PPC2 board */ #define DDR_CS0_BNDS_VAL 0x000000FF #define DDR_CS1_BNDS_VAL 0x010001FF #define DDR_CS2_BNDS_VAL 0x0300033F @@ -243,29 +244,30 @@ enum ifc_amask_sizes { #define DDR_CS3_CONFIG_VAL 0x00040202 #define DDR_CS_CONFIG_2_VAL 0x00000000 -#define DDR_TIMING_CFG_0_VAL 0xFF550004 -#define DDR_TIMING_CFG_1_VAL 0xBCB48C56 -#define DDR_TIMING_CFG_2_VAL 0x0040C114 -#define DDR_TIMING_CFG_3_VAL 0x010C1000 +#define DDR_TIMING_CFG_0_VAL 0xFF530004 +#define DDR_TIMING_CFG_1_VAL 0x98906345 +#define DDR_TIMING_CFG_2_VAL 0x0040A114 +#define DDR_TIMING_CFG_3_VAL 0x010A1100 #define DDR_TIMING_CFG_4_VAL 0x00000001 -#define DDR_TIMING_CFG_5_VAL 0x03402400 +#define DDR_TIMING_CFG_5_VAL 0x04402400 #define DDR_SDRAM_MODE_VAL 0x00441C70 #define DDR_SDRAM_MODE_2_VAL 0x00980000 #define DDR_SDRAM_MODE_3_8_VAL 0x00000000 #define DDR_SDRAM_MD_CNTL_VAL 0x00000000 -#define DDR_SDRAM_CFG_VAL 0xE7044000 -#define DDR_SDRAM_CFG_2_VAL 0x00401050 +#define DDR_SDRAM_CFG_VAL 0xE7040000 +#define DDR_SDRAM_CFG_2_VAL 0x00401000 #define DDR_SDRAM_INTERVAL_VAL 0x0C300100 #define DDR_DATA_INIT_VAL 0xDEADBEEF #define DDR_SDRAM_CLK_CNTL_VAL 0x02400000 #define DDR_ZQ_CNTL_VAL 0x89080600 -#define DDR_WRLVL_CNTL_VAL 0x8675F608 -#define DDR_WRLVL_CNTL_2_VAL 0x080A0A0C -#define DDR_WRLVL_CNTL_3_VAL 0x0C0E0E0D +/* Write leveling - CRITICAL: board-specific values from U-Boot */ +#define DDR_WRLVL_CNTL_VAL 0x8675F604 +#define DDR_WRLVL_CNTL_2_VAL 0x05060607 +#define DDR_WRLVL_CNTL_3_VAL 0x080A0A0B #define DDR_SDRAM_RCW_1_VAL 0x00000000 #define DDR_SDRAM_RCW_2_VAL 0x00000000 @@ -317,6 +319,7 @@ enum ifc_amask_sizes { #define DDR_DDRCDR_2 ((volatile uint32_t*)(DDR_BASE + 0xB2C)) /* DDR Control Driver Register 2 */ #define DDR_DDRDSR_1 ((volatile uint32_t*)(DDR_BASE + 0xB20)) /* DDR Debug Status Register 1 */ #define DDR_DDRDSR_2 ((volatile uint32_t*)(DDR_BASE + 0xB24)) /* DDR Debug Status Register 2 */ +#define DDR_ERR_DETECT ((volatile uint32_t*)(DDR_BASE + 0xE40)) /* Memory error detect */ #define DDR_ERR_DISABLE ((volatile uint32_t*)(DDR_BASE + 0xE44)) /* Memory error disable */ #define DDR_ERR_INT_EN ((volatile uint32_t*)(DDR_BASE + 0xE48)) /* Memory error interrupt enable */ #define DDR_ERR_SBE ((volatile uint32_t*)(DDR_BASE + 0xE58)) /* Single-Bit ECC memory error management */ diff --git a/hal/nxp_t2080.ld b/hal/nxp_t2080.ld index 2294421624..ba723fc800 100644 --- a/hal/nxp_t2080.ld +++ b/hal/nxp_t2080.ld @@ -13,9 +13,9 @@ MEMORY { FLASH (rx) : ORIGIN = @WOLFBOOT_ORIGIN@, LENGTH = @BOOTLOADER_PARTITION_SIZE@ - /* CPC as SRAM - 512KB (T2080 supports up to 2MB, using 512KB) + /* CPC as SRAM - 1MB (T2080 supports up to 2MB, using 1MB for P384 stack) * Layout: .ramcode at bottom, stack grows down from top */ - RAM (rwx) : ORIGIN = 0xF8F80000, LENGTH = 0x80000 + RAM (rwx) : ORIGIN = 0xF8F00000, LENGTH = 0x100000 /* DDR - 2GB */ DRAM (rwx) : ORIGIN = 0x00000000, LENGTH = 0x7FFFFFFF diff --git a/src/boot_ppc_start.S b/src/boot_ppc_start.S index e862062459..ae884a1f45 100644 --- a/src/boot_ppc_start.S +++ b/src/boot_ppc_start.S @@ -596,8 +596,8 @@ cpc_poll_invalidate: stw r0, CPCSRCR1(r1) /* SRAM high address = 0 */ /* SRAM low address - use LOAD_ADDR32 on e6500 to avoid sign extension */ LOAD_ADDR32(r0, L2SRAM_ADDR) - /* Enable SRAM and set size (must match L2SRAM_SIZE = 512KB) */ - ori r0, r0, (CPCSRCR0_SRAMSZ_512 | CPCSRCR0_SRAMEN) + /* Enable SRAM and set size (must match L2SRAM_SIZE = 1MB for P384) */ + ori r0, r0, (CPCSRCR0_SRAMSZ_1024 | CPCSRCR0_SRAMEN) stw r0, CPCSRCR0(r1) mbar isync From c9c3b954a0094217b2ccb9e1e453ef4b374ca5fd Mon Sep 17 00:00:00 2001 From: David Garske Date: Thu, 19 Feb 2026 11:46:50 -0800 Subject: [PATCH 08/11] Test app is booting on the NAII T2080 module --- config/examples/nxp-t2080.config | 12 +--- hal/nxp_ppc.h | 7 ++ hal/nxp_t2080.c | 108 +++++++++++++++++++++++++++---- hal/nxp_t2080.h | 3 + 4 files changed, 108 insertions(+), 22 deletions(-) diff --git a/config/examples/nxp-t2080.config b/config/examples/nxp-t2080.config index 2e82d00412..9393efef31 100644 --- a/config/examples/nxp-t2080.config +++ b/config/examples/nxp-t2080.config @@ -23,45 +23,39 @@ ALLOW_DOWNGRADE?=0 NVM_FLASH_WRITEONCE?=0 WOLFBOOT_VERSION?=0 NO_MPU?=0 -SPMATH?=0 -SPMATHALL?=1 +SPMATH?=1 +SPMATHALL?=0 RAM_CODE?=1 DUALBANK_SWAP?=0 WOLFTPM?=0 +OPTIMIZATION_LEVEL?=1 # NOR Base Address ARCH_FLASH_OFFSET?=0xEFFE0000 -# NAII 68PPC2: ARCH_FLASH_OFFSET?=0xE8000000 # Flash Sector Size WOLFBOOT_SECTOR_SIZE?=0x10000 # wolfBoot start address WOLFBOOT_ORIGIN?=0xEFFE0000 -# NAII 68PPC2: WOLFBOOT_ORIGIN?=0xEFF40000 # wolfBoot partition size (custom) BOOTLOADER_PARTITION_SIZE=0x20000 # Application Partition Size WOLFBOOT_PARTITION_SIZE?=0x20000 -# NAII 68PPC2: WOLFBOOT_PARTITION_SIZE?=0xA00000 # Location in Flash for Application Partition WOLFBOOT_PARTITION_BOOT_ADDRESS?=0xEFFC0000 -# NAII 68PPC2: WOLFBOOT_PARTITION_BOOT_ADDRESS?=0xE8080000 # Load Partition to RAM Address WOLFBOOT_LOAD_ADDRESS?=0x19000 # Location in Flash for Update Partition WOLFBOOT_PARTITION_UPDATE_ADDRESS?=0xEFFA0000 -# NAII 68PPC2: WOLFBOOT_PARTITION_UPDATE_ADDRESS?=0xE8A80000 # Location of temporary sector used during updates WOLFBOOT_PARTITION_SWAP_ADDRESS?=0xEFF90000 -# NAII 68PPC2: WOLFBOOT_PARTITION_SWAP_ADDRESS?=0xE8060000 # DTS (Device Tree) WOLFBOOT_DTS_BOOT_ADDRESS?=0xE8040000 WOLFBOOT_DTS_UPDATE_ADDRESS?=0xE8050000 # DTS Load to RAM Address WOLFBOOT_LOAD_DTS_ADDRESS?=0x200000 -# NAII 68PPC2: WOLFBOOT_LOAD_DTS_ADDRESS?=0x40000 \ No newline at end of file diff --git a/hal/nxp_ppc.h b/hal/nxp_ppc.h index 2d08afe741..be9b330d09 100644 --- a/hal/nxp_ppc.h +++ b/hal/nxp_ppc.h @@ -137,6 +137,13 @@ #define DDR_SIZE (8192ULL * 1024ULL * 1024ULL) #endif + /* DDR stack configuration - relocate from CPC SRAM after DDR init + * Stack is at top of first 32MB of DDR, with 64KB reserved for stack + * Stack grows downward from DDR_STACK_TOP */ + #define DDR_STACK_SIZE (64 * 1024) /* 64KB stack in DDR */ + #define DDR_STACK_TOP 0x02000000UL /* Top of first 32MB */ + #define DDR_STACK_BASE (DDR_STACK_TOP - DDR_STACK_SIZE) + #define FLASH_BASE_ADDR 0xE8000000UL #define FLASH_BASE_PHYS_HIGH 0x0ULL #define FLASH_LAW_SIZE LAW_SIZE_128MB diff --git a/hal/nxp_t2080.c b/hal/nxp_t2080.c index 6428b88a7c..c13e7c13b1 100644 --- a/hal/nxp_t2080.c +++ b/hal/nxp_t2080.c @@ -19,6 +19,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA */ #include +#include #include "target.h" #include "printf.h" #include "image.h" /* for RAMFUNCTION */ @@ -32,7 +33,9 @@ #define ENABLE_BUS_CLK_CALC #ifndef BUILD_LOADER_STAGE1 - #define ENABLE_MP /* multi-core support */ + /* TODO: Fix e6500 MP initialization - secondary cores not responding. + * Disable MP for now to focus on getting basic boot working. */ + /* #define ENABLE_MP */ /* multi-core support */ #endif /* Forward declarations */ @@ -370,6 +373,64 @@ static void hal_cpld_init(void) #endif } +#ifdef ENABLE_DDR +/* Relocate stack from CPC SRAM to DDR for more stack space. + * Call this after DDR is initialized and verified working. + * This allows signature verification (ECC P384) which needs ~20-30KB stack. */ +static void hal_relocate_stack_to_ddr(void) +{ + uint32_t new_sp = DDR_STACK_TOP - 64; /* 64-byte alignment, room for frame */ + + /* Zero the DDR stack area for clean operation */ + memset((void*)DDR_STACK_BASE, 0, DDR_STACK_SIZE); + + /* Switch stack pointer from CPC SRAM to DDR. + * r1 is the stack pointer in PowerPC ABI. */ + __asm__ __volatile__( + "mr 1, %0\n" /* Move new stack address to r1 */ + "sync\n" + : + : "r" (new_sp) + : "memory" + ); + +#ifdef DEBUG_UART + wolfBoot_printf("Stack relocated to DDR (SP=0x%x)\n", new_sp); +#endif +} + +/* Release CPC SRAM back to L2 cache mode after stack is relocated to DDR. + * This gives us the full 2MB CPC as instruction/data cache for better performance. */ +static void hal_reconfigure_cpc_as_cache(void) +{ + volatile uint32_t *cpc_csr0 = (volatile uint32_t *)(CPC_BASE + CPCCSR0); + volatile uint32_t *cpc_srcr0 = (volatile uint32_t *)(CPC_BASE + CPCSRCR0); + uint32_t reg; + + /* Step 1: Flush the CPC to ensure no stale SRAM data. + * IMPORTANT: Read-modify-write to preserve CPCE/CPCPE enable bits! */ + reg = *cpc_csr0; + reg |= CPCCSR0_CPCFL; + *cpc_csr0 = reg; + __asm__ __volatile__("sync; isync" ::: "memory"); + + /* Step 2: Poll until flush completes (CPCFL clears) */ + do { + reg = *cpc_csr0; + } while (reg & CPCCSR0_CPCFL); + + /* Step 3: Disable SRAM mode - release ways back to cache */ + *cpc_srcr0 = 0; /* Clear SRAMEN and SRAMSZ */ + __asm__ __volatile__("sync; isync" ::: "memory"); + + /* CPC remains enabled (CPCE/CPCPE preserved), now with all ways as cache */ + +#ifdef DEBUG_UART + wolfBoot_printf("CPC: Released SRAM, full L2 cache enabled\n"); +#endif +} +#endif /* ENABLE_DDR */ + #if defined(DEBUG_UART) && defined(ENABLE_DDR) /* DDR memory test - writes patterns and verifies readback */ static int hal_ddr_test(void) @@ -381,6 +442,7 @@ static int hal_ddr_test(void) int errors = 0; uint32_t reg; +#ifdef DEBUG_DDR /* Show DDR controller status */ reg = get32(DDR_SDRAM_CFG); wolfBoot_printf("DDR: SDRAM_CFG=0x%x (MEM_EN=%d)\n", reg, @@ -412,6 +474,7 @@ static int hal_ddr_test(void) return -1; } } +#endif /* DEBUG_DDR */ /* Check if DDR is enabled */ if (!(get32(DDR_SDRAM_CFG) & DDR_SDRAM_CFG_MEM_EN)) { @@ -426,6 +489,7 @@ static int hal_ddr_test(void) return -1; } +#ifdef DEBUG_DDR /* Show DDR chip select configuration */ wolfBoot_printf("DDR CS0: BNDS=0x%x CFG=0x%x\n", get32(DDR_CS_BNDS(0)), get32(DDR_CS_CONFIG(0))); @@ -437,28 +501,24 @@ static int hal_ddr_test(void) get32(DDR_DDRDSR_1), get32(DDR_DDRDSR_2)); wolfBoot_printf("DDR DDRCDR_1=0x%x DDRCDR_2=0x%x\n", get32(DDR_DDRCDR_1), get32(DDR_DDRCDR_2)); +#endif /* DEBUG_DDR */ /* Check for pre-existing DDR errors */ reg = get32(DDR_ERR_DETECT); - wolfBoot_printf("DDR ERR_DETECT=0x%x\n", reg); if (reg != 0) { - wolfBoot_printf("DDR: ERROR - Pre-existing DDR errors!\n"); + wolfBoot_printf("DDR: ERR_DETECT=0x%x (errors present)\n", reg); +#ifdef DEBUG_DDR wolfBoot_printf(" Bit 31 (MME): %d - Multiple errors\n", (reg >> 31) & 1); wolfBoot_printf(" Bit 7 (APE): %d - Address parity\n", (reg >> 7) & 1); wolfBoot_printf(" Bit 3 (ACE): %d - Auto calibration\n", (reg >> 3) & 1); wolfBoot_printf(" Bit 2 (CDE): %d - Correctable data\n", (reg >> 2) & 1); - wolfBoot_printf("DDR: Skipping memory test due to errors\n"); +#endif return -1; } +#ifdef DEBUG_DDR wolfBoot_printf("DDR Test: base=0x%x\n", DDR_ADDRESS); - wolfBoot_printf("DDR: Attempting simple read at 0x%x...\n", DDR_ADDRESS); - - /* First just try to read - don't write yet */ - { - volatile uint32_t val = *ddr; - wolfBoot_printf("DDR: Read returned 0x%x\n", val); - } +#endif for (i = 0; i < (int)(sizeof(test_offsets)/sizeof(test_offsets[0])); i++) { uint32_t offset = test_offsets[i]; @@ -476,7 +536,7 @@ static int hal_ddr_test(void) readback = *addr; if (readback != pattern) { - wolfBoot_printf(" FAIL: @0x%x wrote 0x%x read 0x%x\n", + wolfBoot_printf("DDR FAIL: @0x%x wrote 0x%x read 0x%x\n", (uint32_t)addr, pattern, readback); errors++; } @@ -507,6 +567,9 @@ void hal_init(void) #ifdef DEBUG_UART uart_init(); uart_write("wolfBoot Init\n", 14); +#ifndef WOLFBOOT_REPRODUCIBLE_BUILD + wolfBoot_printf("Build: %s %s\n", __DATE__, __TIME__); +#endif #endif hal_flash_init(); @@ -526,8 +589,27 @@ void hal_init(void) hal_mp_init(); #endif -#if defined(DEBUG_UART) && defined(ENABLE_DDR) +#ifdef ENABLE_DDR + /* Test DDR (when DEBUG_UART enabled) */ +#ifdef DEBUG_UART hal_ddr_test(); +#endif + /* TODO: Implement proper assembly-based stack relocation to DDR. + * The current C-based approach corrupts return addresses because: + * 1. hal_init's return address is saved on CPC SRAM stack + * 2. Stack switch changes SP to DDR (zeroed area) + * 3. CPC release makes old stack contents invalid + * 4. Function returns read garbage addresses + * + * For now, keep using CPC SRAM stack (1MB should be enough for P384). + * Stack relocation needs to be done in assembly with proper LR handling. + */ +#if 0 /* Disabled until proper assembly implementation */ + { + hal_relocate_stack_to_ddr(); + hal_reconfigure_cpc_as_cache(); + } +#endif #endif } diff --git a/hal/nxp_t2080.h b/hal/nxp_t2080.h index 1b10e9b717..ee219c34ad 100644 --- a/hal/nxp_t2080.h +++ b/hal/nxp_t2080.h @@ -27,6 +27,9 @@ #include "nxp_ppc.h" +/* Uncomment to enable verbose DDR debugging output */ +/* #define DEBUG_DDR */ + /* T2080 System Clock */ #define SYS_CLK (600000000) /* 100MHz PLL with 6:1 = 600 MHz */ From 61638a448103a83212a331169d768990e32b01ec Mon Sep 17 00:00:00 2001 From: David Garske Date: Thu, 19 Feb 2026 13:28:37 -0800 Subject: [PATCH 09/11] Relocation to DDR and enabling cache --- docs/Targets.md | 20 ++++-- hal/nxp_ppc.h | 5 ++ hal/nxp_t2080.c | 144 +++++++++++++++++++++++++++---------------- src/boot_ppc.c | 18 ++++++ src/boot_ppc_start.S | 24 ++++++++ 5 files changed, 154 insertions(+), 57 deletions(-) diff --git a/docs/Targets.md b/docs/Targets.md index 356902ff1a..f51070d2bb 100644 --- a/docs/Targets.md +++ b/docs/Targets.md @@ -3129,12 +3129,22 @@ Example Boot Debug Output: ``` wolfBoot Init -Part: Active 0, Address E8080000 -Image size 1028 +Build: Feb 19 2026 13:24:56 +DDR Test: PASSED +Ramcode: copied 2852 bytes to DDR, TLB9 remapped +CPC: Released SRAM, full 2MB L2 cache enabled +Flash: caching enabled (L1+L2+CPC) +Versions: Boot 1, Update 0 +Trying Boot partition at 0xEFFC0000 +Boot partition: 0xEFFC0000 (sz 3164, ver 0x1, type 0x601) +Checking integrity...done +Verifying signature...done +Successfully selected image in part: 0 Firmware Valid -Loading 1028 bytes to RAM at 19000 -Failed parsing DTB to load. -Booting at 19000 +Copying image from 0xEFFC0200 to RAM at 0x19000 (3164 bytes) +Failed parsing DTB to load +Booting at 0x19000 +FDT: Invalid header! -1 Test App 0x00000001 diff --git a/hal/nxp_ppc.h b/hal/nxp_ppc.h index be9b330d09..ba19440be7 100644 --- a/hal/nxp_ppc.h +++ b/hal/nxp_ppc.h @@ -144,6 +144,11 @@ #define DDR_STACK_TOP 0x02000000UL /* Top of first 32MB */ #define DDR_STACK_BASE (DDR_STACK_TOP - DDR_STACK_SIZE) + /* DDR address where .ramcode is copied before CPC SRAM is released. + * TLB9 is remapped: VA 0xF8F00000 -> PA DDR_RAMCODE_ADDR so that + * RAMFUNCTION code continues to work after CPC becomes L2 cache. */ + #define DDR_RAMCODE_ADDR 0x03000000UL /* 48MB into DDR */ + #define FLASH_BASE_ADDR 0xE8000000UL #define FLASH_BASE_PHYS_HIGH 0x0ULL #define FLASH_LAW_SIZE LAW_SIZE_128MB diff --git a/hal/nxp_t2080.c b/hal/nxp_t2080.c index c13e7c13b1..a442877e23 100644 --- a/hal/nxp_t2080.c +++ b/hal/nxp_t2080.c @@ -19,7 +19,6 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA */ #include -#include #include "target.h" #include "printf.h" #include "image.h" /* for RAMFUNCTION */ @@ -374,59 +373,105 @@ static void hal_cpld_init(void) } #ifdef ENABLE_DDR -/* Relocate stack from CPC SRAM to DDR for more stack space. - * Call this after DDR is initialized and verified working. - * This allows signature verification (ECC P384) which needs ~20-30KB stack. */ -static void hal_relocate_stack_to_ddr(void) +/* Release CPC SRAM back to L2 cache mode. + * Call after stack is relocated to DDR (done in boot_entry_C). + * This gives us the full 2MB CPC as L3 cache for better performance. + * + * Before releasing CPC SRAM, .ramcode (RAMFUNCTION) is copied to DDR + * and TLB9 is remapped: VA 0xF8F00000 -> PA DDR_RAMCODE_ADDR so that + * RAMFUNCTION code (memcpy, wolfBoot_start, etc.) continues to work. */ +static void hal_reconfigure_cpc_as_cache(void) { - uint32_t new_sp = DDR_STACK_TOP - 64; /* 64-byte alignment, room for frame */ + volatile uint32_t *cpc_csr0 = (volatile uint32_t *)(CPC_BASE + CPCCSR0); + volatile uint32_t *cpc_srcr0 = (volatile uint32_t *)(CPC_BASE + CPCSRCR0); + uint32_t reg; - /* Zero the DDR stack area for clean operation */ - memset((void*)DDR_STACK_BASE, 0, DDR_STACK_SIZE); + /* Linker symbols for .ramcode section boundaries */ + extern unsigned int _start_ramcode; + extern unsigned int _end_ramcode; + uint32_t ramcode_size = (uint32_t)&_end_ramcode - (uint32_t)&_start_ramcode; + + /* Step 1: Copy .ramcode from CPC SRAM to DDR. + * Must use volatile loop — memcpy itself is in .ramcode! */ + if (ramcode_size > 0) { + volatile const uint32_t *src = (volatile const uint32_t *)&_start_ramcode; + volatile uint32_t *dst = (volatile uint32_t *)DDR_RAMCODE_ADDR; + volatile uint32_t *end = (volatile uint32_t *)(DDR_RAMCODE_ADDR + + ramcode_size); + while (dst < end) { + *dst++ = *src++; + } + + /* Flush D-cache and invalidate I-cache for the DDR copy */ + flush_cache(DDR_RAMCODE_ADDR, ramcode_size); - /* Switch stack pointer from CPC SRAM to DDR. - * r1 is the stack pointer in PowerPC ABI. */ - __asm__ __volatile__( - "mr 1, %0\n" /* Move new stack address to r1 */ - "sync\n" - : - : "r" (new_sp) - : "memory" - ); + /* Step 2: Remap TLB9: same VA (0xF8F00000) -> DDR physical address. + * All .ramcode references use VA 0xF8F00000, so this makes them + * transparently access the DDR copy instead of CPC SRAM. */ + set_tlb(1, 9, + L2SRAM_ADDR, DDR_RAMCODE_ADDR, 0, + MAS3_SX | MAS3_SW | MAS3_SR, MAS2_M, 0, + INITIAL_SRAM_BOOKE_SZ, 1); + + /* Ensure TLB update and I-cache pick up new mapping */ + invalidate_icache(); + } #ifdef DEBUG_UART - wolfBoot_printf("Stack relocated to DDR (SP=0x%x)\n", new_sp); + wolfBoot_printf("Ramcode: copied %d bytes to DDR, TLB9 remapped\n", + ramcode_size); #endif -} - -/* Release CPC SRAM back to L2 cache mode after stack is relocated to DDR. - * This gives us the full 2MB CPC as instruction/data cache for better performance. */ -static void hal_reconfigure_cpc_as_cache(void) -{ - volatile uint32_t *cpc_csr0 = (volatile uint32_t *)(CPC_BASE + CPCCSR0); - volatile uint32_t *cpc_srcr0 = (volatile uint32_t *)(CPC_BASE + CPCSRCR0); - uint32_t reg; - /* Step 1: Flush the CPC to ensure no stale SRAM data. - * IMPORTANT: Read-modify-write to preserve CPCE/CPCPE enable bits! */ + /* Step 3: Flush the CPC to push any dirty SRAM data out. + * Read-modify-write to preserve CPCE/CPCPE enable bits. */ reg = *cpc_csr0; reg |= CPCCSR0_CPCFL; *cpc_csr0 = reg; __asm__ __volatile__("sync; isync" ::: "memory"); - /* Step 2: Poll until flush completes (CPCFL clears) */ - do { - reg = *cpc_csr0; - } while (reg & CPCCSR0_CPCFL); + /* Step 4: Poll until flush completes (CPCFL clears) */ + while (*cpc_csr0 & CPCCSR0_CPCFL); - /* Step 3: Disable SRAM mode - release ways back to cache */ - *cpc_srcr0 = 0; /* Clear SRAMEN and SRAMSZ */ + /* Step 5: Disable SRAM mode - release all ways back to cache */ + *cpc_srcr0 = 0; __asm__ __volatile__("sync; isync" ::: "memory"); - /* CPC remains enabled (CPCE/CPCPE preserved), now with all ways as cache */ + /* Step 6: Disable CPC SRAM LAW (no longer needed — TLB9 now routes + * to DDR via LAW4, not CPC SRAM via LAW2). + * Keep TLB9 — it's remapped to DDR and still in use. */ + set32(LAWAR(2), 0); + + /* Step 7: Flash invalidate CPC to start fresh as cache */ + reg = *cpc_csr0; + reg |= CPCCSR0_CPCFI; + *cpc_csr0 = reg; + __asm__ __volatile__("sync; isync" ::: "memory"); + while (*cpc_csr0 & CPCCSR0_CPCFI); + + /* CPC remains enabled (CPCE/CPCPE preserved), now all 2MB as cache */ #ifdef DEBUG_UART - wolfBoot_printf("CPC: Released SRAM, full L2 cache enabled\n"); + wolfBoot_printf("CPC: Released SRAM, full 2MB L2 cache enabled\n"); +#endif +} + +/* Make flash TLB cacheable for XIP code performance. + * Changes TLB Entry 2 (flash) from MAS2_I|MAS2_G to MAS2_M. + * This enables L1 I-cache + L2 + CPC to cache flash instructions. */ +static void hal_flash_enable_caching(void) +{ + /* Rewrite flash TLB entry with cacheable attributes. + * MAS2_M = memory coherent, enables caching */ + set_tlb(1, 2, + FLASH_BASE_ADDR, FLASH_BASE_ADDR, FLASH_BASE_PHYS_HIGH, + MAS3_SX | MAS3_SW | MAS3_SR, MAS2_M, 0, + FLASH_TLB_PAGESZ, 1); + + /* Invalidate L1 I-cache so new TLB attributes take effect */ + invalidate_icache(); + +#ifdef DEBUG_UART + wolfBoot_printf("Flash: caching enabled (L1+L2+CPC)\n"); #endif } #endif /* ENABLE_DDR */ @@ -594,22 +639,17 @@ void hal_init(void) #ifdef DEBUG_UART hal_ddr_test(); #endif - /* TODO: Implement proper assembly-based stack relocation to DDR. - * The current C-based approach corrupts return addresses because: - * 1. hal_init's return address is saved on CPC SRAM stack - * 2. Stack switch changes SP to DDR (zeroed area) - * 3. CPC release makes old stack contents invalid - * 4. Function returns read garbage addresses + + /* Stack is already in DDR (relocated in boot_entry_C via + * ddr_call_with_stack trampoline before main() was called). * - * For now, keep using CPC SRAM stack (1MB should be enough for P384). - * Stack relocation needs to be done in assembly with proper LR handling. - */ -#if 0 /* Disabled until proper assembly implementation */ - { - hal_relocate_stack_to_ddr(); - hal_reconfigure_cpc_as_cache(); - } -#endif + * Now release CPC SRAM back to L2 cache and enable flash caching. + * This dramatically improves ECC signature verification performance: + * - CPC (2MB) becomes L3 cache for all memory accesses + * - Flash code is cached by L1 I-cache + L2 + CPC + * - Stack/data in DDR is cached by L1 D-cache + L2 + CPC */ + hal_reconfigure_cpc_as_cache(); + hal_flash_enable_caching(); #endif } diff --git a/src/boot_ppc.c b/src/boot_ppc.c index 810421e1c1..fd8e846c8c 100644 --- a/src/boot_ppc.c +++ b/src/boot_ppc.c @@ -179,7 +179,25 @@ void boot_entry_C(void) } /* Run wolfBoot! */ +#ifdef ENABLE_DDR + /* DDR is initialized, .data and .bss are set up. + * Switch stack from CPC SRAM to DDR for: + * 1. Better performance (DDR stack is cacheable by L1/L2/CPC) + * 2. More stack space (64KB vs shared CPC SRAM) + * Uses assembly trampoline since we can't return after stack switch. + * The CPC SRAM will be released back to L2 cache in hal_init(). */ + { + extern void ddr_call_with_stack(uint32_t func, uint32_t sp); + /* Zero DDR stack area using volatile to prevent memset transform */ + volatile uint32_t *p = (volatile uint32_t *)DDR_STACK_BASE; + volatile uint32_t *e = (volatile uint32_t *)DDR_STACK_TOP; + while (p < e) { *p++ = 0; } + ddr_call_with_stack((uint32_t)main, DDR_STACK_TOP - 64); + /* Does not return */ + } +#else main(); +#endif } #ifndef BUILD_LOADER_STAGE1 diff --git a/src/boot_ppc_start.S b/src/boot_ppc_start.S index ae884a1f45..b84edcf710 100644 --- a/src/boot_ppc_start.S +++ b/src/boot_ppc_start.S @@ -1001,6 +1001,30 @@ dcache_disable: blr #endif +/* void ddr_call_with_stack(uint32_t func_ptr, uint32_t new_sp) + * Switches stack pointer to DDR and calls the given function. + * Used by boot_entry_C to transition from CPC SRAM stack to DDR stack + * before calling main(). Does not return. + * r3 = function pointer to call + * r4 = new stack pointer (top of DDR stack area) */ +.global ddr_call_with_stack +ddr_call_with_stack: + mr r1, r4 /* Set stack pointer to DDR */ + /* Create minimal PPC ABI stack frame with terminated back chain */ + li r0, 0 + stwu r0, -4(r1) /* Terminate back chain */ + stwu r0, -4(r1) + stwu r1, -8(r1) /* Save back chain and move SP */ + lis r0, RESET_VECTOR@h + ori r0, r0, RESET_VECTOR@l + stwu r1, -8(r1) /* Save back chain and move SP */ + stw r0, +12(r1) /* Save return addr (underflow vector) */ + /* Call the function */ + mtctr r3 + bctrl + /* Should never reach here */ +1: b 1b + #ifdef USE_GOT /* function to relocate code, handling cache flushing and continue to From 610af129c9f4201b202a3d4bf5c14a7c2dda00cc Mon Sep 17 00:00:00 2001 From: David Garske Date: Thu, 19 Feb 2026 15:16:44 -0800 Subject: [PATCH 10/11] Progress with multi-core --- hal/nxp_t2080.c | 191 +++++++++++++++++++++++++++++++++++----------- src/boot_ppc_mp.S | 13 +++- 2 files changed, 159 insertions(+), 45 deletions(-) diff --git a/hal/nxp_t2080.c b/hal/nxp_t2080.c index a442877e23..d1fd27fb23 100644 --- a/hal/nxp_t2080.c +++ b/hal/nxp_t2080.c @@ -32,13 +32,11 @@ #define ENABLE_BUS_CLK_CALC #ifndef BUILD_LOADER_STAGE1 - /* TODO: Fix e6500 MP initialization - secondary cores not responding. - * Disable MP for now to focus on getting basic boot working. */ - /* #define ENABLE_MP */ /* multi-core support */ + #define ENABLE_MP /* multi-core support */ #endif /* Forward declarations */ -static void hal_flash_unlock_sector(uint32_t sector); +static void RAMFUNCTION hal_flash_unlock_sector(uint32_t sector); #ifdef ENABLE_MP static void hal_mp_init(void); #endif @@ -202,24 +200,32 @@ static int hal_flash_getid(void) static void hal_flash_init(void) { #ifdef ENABLE_IFC + uint32_t cspr; + /* IFC CS0 - NOR Flash - * Do NOT reprogram IFC CS0 (CSPR, AMASK, CSOR, FTIM) while executing - * from flash (XIP) with cache-inhibited TLB (MAS2_I|MAS2_G). The boot - * ROM already configured CS0 correctly. Reprogramming CSPR while XIP - * can cause instruction fetch failures because there is no cache to - * serve fetches during the chip-select decode transition. - * - * U-Boot avoids this by using MAS2_W|MAS2_G (write-through, cached) - * during XIP, only switching to MAS2_I|MAS2_G after relocating to RAM. + * Do NOT reprogram IFC CS0 base address, port size, AMASK, CSOR, or + * FTIM while executing from flash (XIP). The boot ROM already + * configured CS0 correctly. * - * The LAW is also already set in boot_ppc_start.S:flash_law. - */ + * However, the boot ROM may set IFC_CSPR_WP (write-protect), which + * blocks all write cycles to the flash. This prevents AMD command + * sequences (erase/program) from reaching the chips. Clearing just + * the WP bit is safe during XIP — it doesn't change chip-select + * decode, only enables write forwarding. */ + cspr = get32(IFC_CSPR(0)); +#ifdef DEBUG_UART + wolfBoot_printf("IFC CSPR0: 0x%x%s\n", cspr, + (cspr & IFC_CSPR_WP) ? " (WP set)" : ""); +#endif + if (cspr & IFC_CSPR_WP) { + set32(IFC_CSPR(0), cspr & ~IFC_CSPR_WP); + } /* Note: hal_flash_getid() is disabled because AMD Autoselect mode * affects the entire flash bank. Since wolfBoot runs XIP from the same * bank (CS0), entering Autoselect mode crashes instruction fetch. - * Flash write/erase operations will need RAMFUNCTION support. - * TODO: Implement RAMFUNCTION for flash operations on T2080. */ + * Flash write/erase use RAMFUNCTION to execute from DDR during + * flash command mode (after .ramcode relocation in hal_init). */ #endif /* ENABLE_IFC */ } @@ -630,10 +636,6 @@ void hal_init(void) #endif #endif /* ENABLE_CPLD */ -#ifdef ENABLE_MP - hal_mp_init(); -#endif - #ifdef ENABLE_DDR /* Test DDR (when DEBUG_UART enabled) */ #ifdef DEBUG_UART @@ -651,17 +653,78 @@ void hal_init(void) hal_reconfigure_cpc_as_cache(); hal_flash_enable_caching(); #endif + +#ifdef ENABLE_MP + /* Start secondary cores AFTER CPC release and flash caching. + * Secondary cores' L2 flash-invalidate on the shared cluster L2 + * must not disrupt the CPC SRAM→cache transition. Starting them + * after ensures the cache hierarchy is fully stable. */ + hal_mp_init(); +#endif +} + +/* RAM-resident microsecond delay using inline timebase reads. + * Cannot call wait_ticks() (in flash .text) from RAMFUNCTION code + * while flash is in command mode — instruction fetch would return garbage. */ +static void RAMFUNCTION ram_udelay(uint32_t delay_us) +{ + uint32_t tbl_start, tbl_now; + uint32_t ticks = delay_us * DELAY_US; + __asm__ __volatile__("mfspr %0,268" : "=r"(tbl_start)); + do { + __asm__ __volatile__("mfspr %0,268" : "=r"(tbl_now)); + } while ((tbl_now - tbl_start) < ticks); } -static void hal_flash_unlock_sector(uint32_t sector) +/* Switch flash TLB to cache-inhibited for direct flash chip access. + * AMD flash commands require writes to reach the chip immediately and + * status reads to come directly from the chip. With MAS2_M (cacheable), + * writes are cached and never reach the flash, reads return stale data. + * Uses direct SPR manipulation to avoid calling .text functions. */ +static void RAMFUNCTION hal_flash_cache_disable(void) +{ + uint32_t mas2; + /* Select TLB1, entry 2 (flash) */ + mtspr(MAS0, BOOKE_MAS0(1, 2, 0)); + __asm__ __volatile__("isync; tlbre; isync"); + /* Change WIMGE from M to I|G */ + mas2 = mfspr(MAS2); + mas2 &= ~0x1F; /* clear WIMGE bits */ + mas2 |= (MAS2_I | MAS2_G); + mtspr(MAS2, mas2); + __asm__ __volatile__("isync; msync; tlbwe; isync"); +} + +/* Restore flash TLB to cacheable mode after flash operation. + * Flash is back in read-array mode, safe to cache again. */ +static void RAMFUNCTION hal_flash_cache_enable(void) +{ + uint32_t mas2; + /* Select TLB1, entry 2 (flash) */ + mtspr(MAS0, BOOKE_MAS0(1, 2, 0)); + __asm__ __volatile__("isync; tlbre; isync"); + /* Change WIMGE from I|G to M (cacheable) */ + mas2 = mfspr(MAS2); + mas2 &= ~0x1F; + mas2 |= MAS2_M; + mtspr(MAS2, mas2); + __asm__ __volatile__("isync; msync; tlbwe; isync"); + /* Invalidate D-cache and I-cache — stale entries from before + * the flash operation must be discarded */ + invalidate_dcache(); + invalidate_icache(); +} + +static void RAMFUNCTION hal_flash_unlock_sector(uint32_t sector) { /* AMD unlock sequence */ FLASH_IO8_WRITE(sector, FLASH_UNLOCK_ADDR1, AMD_CMD_UNLOCK_START); FLASH_IO8_WRITE(sector, FLASH_UNLOCK_ADDR2, AMD_CMD_UNLOCK_ACK); } -/* wait for toggle to stop and status mask to be met within microsecond timeout */ -static int hal_flash_status_wait(uint32_t sector, uint16_t mask, +/* wait for toggle to stop and status mask to be met within microsecond timeout. + * RAMFUNCTION: executes from DDR while flash is in program/erase command mode. */ +static int RAMFUNCTION hal_flash_status_wait(uint32_t sector, uint16_t mask, uint32_t timeout_us) { int ret = 0; @@ -682,7 +745,7 @@ static int hal_flash_status_wait(uint32_t sector, uint16_t mask, #endif if (read1 == read2 && ((read1 & mask) == mask)) break; - udelay(1); + ram_udelay(1); } while (timeout++ < timeout_us); if (timeout >= timeout_us) { ret = -1; /* timeout */ @@ -694,7 +757,7 @@ static int hal_flash_status_wait(uint32_t sector, uint16_t mask, return ret; } -int hal_flash_write(uint32_t address, const uint8_t *data, int len) +int RAMFUNCTION hal_flash_write(uint32_t address, const uint8_t *data, int len) { int ret = 0; uint32_t i, pos, sector, offset, xfer, nwords; @@ -708,6 +771,9 @@ int hal_flash_write(uint32_t address, const uint8_t *data, int len) data, address, len); #endif + /* Disable flash caching — AMD commands must reach the chip directly */ + hal_flash_cache_disable(); + pos = 0; while (len > 0) { /* determine sector address */ @@ -754,10 +820,13 @@ int hal_flash_write(uint32_t address, const uint8_t *data, int len) address += xfer; len -= xfer; } + + /* Restore flash caching — flash is back in read-array mode */ + hal_flash_cache_enable(); return ret; } -int hal_flash_erase(uint32_t address, int len) +int RAMFUNCTION hal_flash_erase(uint32_t address, int len) { int ret = 0; uint32_t sector; @@ -766,6 +835,9 @@ int hal_flash_erase(uint32_t address, int len) if (address >= FLASH_BASE_ADDR) address -= FLASH_BASE_ADDR; + /* Disable flash caching — AMD commands must reach the chip directly */ + hal_flash_cache_disable(); + while (len > 0) { /* determine sector address */ sector = (address / FLASH_SECTOR_SIZE); @@ -792,10 +864,13 @@ int hal_flash_erase(uint32_t address, int len) address += FLASH_SECTOR_SIZE; len -= FLASH_SECTOR_SIZE; } + + /* Restore flash caching — flash is back in read-array mode */ + hal_flash_cache_enable(); return ret; } -void hal_flash_unlock(void) +void RAMFUNCTION hal_flash_unlock(void) { /* Per-sector unlock is done in hal_flash_write/erase before each operation. * The previous non-volatile PPB protection mode (C0h) approach caused @@ -818,8 +893,9 @@ extern uint32_t _spin_table[]; extern uint32_t _spin_table_addr; extern uint32_t _bootpg_addr; -/* Startup additional cores with spin table and synchronize the timebase */ -static void hal_mp_up(uint32_t bootpg) +/* Startup additional cores with spin table and synchronize the timebase. + * spin_table_ddr: DDR address of the spin table (for checking status) */ +static void hal_mp_up(uint32_t bootpg, uint32_t spin_table_ddr) { uint32_t all_cores, active_cores, whoami; int timeout = 50, i; @@ -829,7 +905,7 @@ static void hal_mp_up(uint32_t bootpg) active_cores = (1 << whoami); /* current running cores */ wolfBoot_printf("MP: Starting cores (boot page %p, spin table %p)\n", - bootpg, (uint32_t)_spin_table); + bootpg, spin_table_ddr); /* Set the boot page translation register */ set32(LCC_BSTRH, 0); @@ -849,8 +925,8 @@ static void hal_mp_up(uint32_t bootpg) /* wait for other core(s) to start */ while (timeout) { for (i = 0; i < CPU_NUMCORES; i++) { - uint32_t* entry = (uint32_t*)( - (uint8_t*)_spin_table + (i * ENTRY_SIZE) + ENTRY_ADDR_LOWER); + volatile uint32_t* entry = (volatile uint32_t*)( + spin_table_ddr + (i * ENTRY_SIZE) + ENTRY_ADDR_LOWER); if (*entry) { active_cores |= (1 << i); } @@ -881,7 +957,7 @@ static void hal_mp_up(uint32_t bootpg) static void hal_mp_init(void) { uint32_t *fixup = (uint32_t*)&_secondary_start_page; - uint32_t bootpg; + uint32_t bootpg, second_half_ddr, spin_table_ddr; int i_tlb = 0; /* always 0 */ size_t i; const volatile uint32_t *s; @@ -893,31 +969,60 @@ static void hal_mp_init(void) * size to ensure bootpg fits in 32 bits and is accessible. */ bootpg = DDR_ADDRESS + 0x80000000UL - BOOT_ROM_SIZE; - /* Store the boot page address for use by additional CPU cores */ - _bootpg_addr = (uint32_t)&_second_half_boot_page; + /* Second half boot page (spin loop + spin table) goes just below. + * For XIP flash builds, .bootmp is in flash — secondary cores can't + * write to flash, so the spin table MUST be in DDR. */ + second_half_ddr = bootpg - BOOT_ROM_SIZE; - /* Store location of spin table for other cores */ - _spin_table_addr = (uint32_t)_spin_table; + /* DDR addresses for second half symbols */ + spin_table_ddr = second_half_ddr + + ((uint32_t)_spin_table - (uint32_t)&_second_half_boot_page); - /* Flush bootpg before copying to invalidate any stale cache lines */ + /* Flush DDR destination before copying */ flush_cache(bootpg, BOOT_ROM_SIZE); + flush_cache(second_half_ddr, BOOT_ROM_SIZE); - /* Map reset page to bootpg so we can copy code there */ + /* Map reset page to bootpg so we can copy code there. + * Boot page translation will redirect secondary core fetches from + * 0xFFFFF000 to bootpg in DDR. */ disable_tlb1(i_tlb); set_tlb(1, i_tlb, BOOT_ROM_ADDR, bootpg, 0, /* tlb, epn, rpn, urpn */ (MAS3_SX | MAS3_SW | MAS3_SR), (MAS2_I | MAS2_G), /* perms, wimge */ 0, BOOKE_PAGESZ_4K, 1); /* ts, esel, tsize, iprot */ - /* copy startup code to virtually mapped boot address */ - /* do not use memcpy due to compiler array bounds report (not valid) */ + /* Copy first half (startup code) to DDR via BOOT_ROM_ADDR mapping. + * Uses cache-inhibited TLB to ensure data reaches DDR immediately. */ s = (const uint32_t*)fixup; d = (uint32_t*)BOOT_ROM_ADDR; for (i = 0; i < BOOT_ROM_SIZE/4; i++) { d[i] = s[i]; } - /* start core and wait for it to be enabled */ - hal_mp_up(bootpg); + /* Write _bootpg_addr and _spin_table_addr into the DDR first-half copy. + * These variables are .long 0 in the linked .bootmp (flash), and direct + * stores to their flash addresses silently fail on XIP builds. + * Calculate offsets within the boot page and write via BOOT_ROM_ADDR. */ + { + volatile uint32_t *bp = (volatile uint32_t*)(BOOT_ROM_ADDR + + ((uint32_t)&_bootpg_addr - (uint32_t)&_secondary_start_page)); + volatile uint32_t *st = (volatile uint32_t*)(BOOT_ROM_ADDR + + ((uint32_t)&_spin_table_addr - (uint32_t)&_secondary_start_page)); + *bp = second_half_ddr; + *st = spin_table_ddr; + } + + /* Copy second half (spin loop + spin table) directly to DDR. + * Master has DDR TLB (entry 12, MAS2_M). Flush cache after copy + * to ensure secondary cores see the data. */ + s = (const uint32_t*)&_second_half_boot_page; + d = (uint32_t*)second_half_ddr; + for (i = 0; i < BOOT_ROM_SIZE/4; i++) { + d[i] = s[i]; + } + flush_cache(second_half_ddr, BOOT_ROM_SIZE); + + /* start cores and wait for them to be enabled */ + hal_mp_up(bootpg, spin_table_ddr); } #endif /* ENABLE_MP */ diff --git a/src/boot_ppc_mp.S b/src/boot_ppc_mp.S index 3b62c40e88..bd6af369fe 100644 --- a/src/boot_ppc_mp.S +++ b/src/boot_ppc_mp.S @@ -110,8 +110,8 @@ branch_prediction: srwi r10, r0, 5 /* r10 = cluster */ mulli r5, r10, CORES_PER_CLUSTER - add r5, r5, r8 - mulli r4, r5, CORES_PER_CLUSTER + add r5, r5, r8 /* r5 = linear core ID */ + mr r4, r5 /* r4 = PIR = linear core ID */ #elif defined(CORE_E500MC) /* BOOKE e500mc family */ rlwinm r4, r0, 27, 27, 31 mr r5, r4 @@ -132,6 +132,15 @@ branch_prediction: mtspr L1CSR2, r8 #if defined(CORE_E6500) /* --- L2 E6500 --- */ +ccsr_tlb_mp: + /* e6500 L2 uses memory-mapped CCSR registers (L2_CLUSTER_BASE). + * Secondary cores have no TLBs on entry — only the boot page + * translation provides initial access. Add a temporary CCSR + * mapping (TLB1 entry 2) so L2 setup can access the registers. */ + set_tlb(1, 2, + CCSRBAR, CCSRBAR, CCSRBAR_PHYS_HIGH, + MAS3_SW | MAS3_SR, MAS2_I | MAS2_G, 0, + CCSRBAR_SIZE, 0, r11) l2_setup_cache: /* E6500CORERM: 11.7 L2 cache state */ /* R5 = L2 cluster 1 base */ From 0edb1dc6bc4a0cd2f7ffd46eb4ddc0a3aaf6b39e Mon Sep 17 00:00:00 2001 From: David Garske Date: Thu, 19 Feb 2026 16:14:17 -0800 Subject: [PATCH 11/11] Progress with IFC Flash testing --- config/examples/nxp-t2080.config | 7 ++-- hal/nxp_t2080.c | 61 ++++++++++++++++++++++++++------ hal/nxp_t2080.h | 2 +- 3 files changed, 55 insertions(+), 15 deletions(-) diff --git a/config/examples/nxp-t2080.config b/config/examples/nxp-t2080.config index 9393efef31..f5d20b9d54 100644 --- a/config/examples/nxp-t2080.config +++ b/config/examples/nxp-t2080.config @@ -1,8 +1,4 @@ # NXP T2080 wolfBoot Configuration Template -# -# Stock (default): Compact layout, NOR base 0xEFFE0000 -# NAII 68PPC2 (alternate): Larger app partition, NOR base 0xE8000000 -# Uncomment the "# NAII 68PPC2:" lines and comment the stock lines to use. ARCH=PPC TARGET=nxp_t2080 @@ -59,3 +55,6 @@ WOLFBOOT_DTS_BOOT_ADDRESS?=0xE8040000 WOLFBOOT_DTS_UPDATE_ADDRESS?=0xE8050000 # DTS Load to RAM Address WOLFBOOT_LOAD_DTS_ADDRESS?=0x200000 + +# Optional QSPI flash test (erase/write/read on update partition) +#CFLAGS_EXTRA+=-DTEST_FLASH diff --git a/hal/nxp_t2080.c b/hal/nxp_t2080.c index d1fd27fb23..bb88377766 100644 --- a/hal/nxp_t2080.c +++ b/hal/nxp_t2080.c @@ -217,9 +217,10 @@ static void hal_flash_init(void) wolfBoot_printf("IFC CSPR0: 0x%x%s\n", cspr, (cspr & IFC_CSPR_WP) ? " (WP set)" : ""); #endif - if (cspr & IFC_CSPR_WP) { - set32(IFC_CSPR(0), cspr & ~IFC_CSPR_WP); - } + /* WP clearing is done in hal_flash_clear_wp() from RAMFUNCTION code. + * T2080RM requires V=0 before modifying IFC_CSPR, which is not safe + * during XIP. The RAMFUNCTION code runs from DDR with flash TLB + * guarded, so it can safely toggle V=0 -> modify -> V=1. */ /* Note: hal_flash_getid() is disabled because AMD Autoselect mode * affects the entire flash bank. Since wolfBoot runs XIP from the same @@ -715,6 +716,25 @@ static void RAMFUNCTION hal_flash_cache_enable(void) invalidate_icache(); } +/* Clear IFC write-protect. T2080RM says IFC_CSPR should only be written + * when V=0. Must be called from RAMFUNCTION (DDR) with flash TLB set to + * guarded (MAS2_G) so no speculative access occurs while V is briefly 0. */ +static void RAMFUNCTION hal_flash_clear_wp(void) +{ + uint32_t cspr = get32(IFC_CSPR(0)); + if (cspr & IFC_CSPR_WP) { + /* Clear V first, then modify WP, then re-enable V */ + set32(IFC_CSPR(0), cspr & ~(IFC_CSPR_WP | IFC_CSPR_V)); + __asm__ __volatile__("sync; isync"); + set32(IFC_CSPR(0), (cspr & ~IFC_CSPR_WP) | IFC_CSPR_V); + __asm__ __volatile__("sync; isync"); + /* Verify WP cleared */ + cspr = get32(IFC_CSPR(0)); + wolfBoot_printf("WP clear: CSPR0=0x%x%s\n", cspr, + (cspr & IFC_CSPR_WP) ? " (FAILED)" : " (OK)"); + } +} + static void RAMFUNCTION hal_flash_unlock_sector(uint32_t sector) { /* AMD unlock sequence */ @@ -773,6 +793,12 @@ int RAMFUNCTION hal_flash_write(uint32_t address, const uint8_t *data, int len) /* Disable flash caching — AMD commands must reach the chip directly */ hal_flash_cache_disable(); + hal_flash_clear_wp(); + + /* Reset flash to read-array mode in case previous operation left it + * in command mode (e.g. after a timeout or incomplete operation) */ + FLASH_IO8_WRITE(0, 0, AMD_CMD_RESET); + ram_udelay(50); pos = 0; while (len > 0) { @@ -792,18 +818,15 @@ int RAMFUNCTION hal_flash_write(uint32_t address, const uint8_t *data, int len) hal_flash_unlock_sector(sector); FLASH_IO8_WRITE(sector, offset, AMD_CMD_WRITE_TO_BUFFER); - #if FLASH_CFI_WIDTH == 16 - FLASH_IO16_WRITE(sector, offset, (nwords-1)); - #else + /* Word count (N-1) must be replicated to both chips */ FLASH_IO8_WRITE(sector, offset, (nwords-1)); - #endif for (i=0; i 0) { /* determine sector address */ @@ -854,10 +884,21 @@ int RAMFUNCTION hal_flash_erase(uint32_t address, int len) /* block erase timeout = 50us - for additional sectors */ /* Typical is 200ms (max 1100ms) */ + /* Debug: check if flash entered erase mode (DQ6 should toggle) */ + { + uint16_t r1 = FLASH_IO8_READ(sector, 0); + uint16_t r2 = FLASH_IO8_READ(sector, 0); + wolfBoot_printf("Erase cmd sent: read 0x%x, 0x%x %s\n", + r1, r2, (r1 != r2) ? "(toggling)" : "(NOT toggling)"); + } + /* poll for erase completion - max 1.1 sec */ ret = hal_flash_status_wait(sector, 0x4C, 1100*1000); if (ret != 0) { - wolfBoot_printf("Flash Erase: Timeout at sector %d\n", sector); + uint16_t r1 = FLASH_IO8_READ(sector, 0); + wolfBoot_printf("Flash Erase: Timeout at sector %d (status 0x%x)\n", + sector, r1); + FLASH_IO8_WRITE(sector, 0, AMD_CMD_RESET); break; } diff --git a/hal/nxp_t2080.h b/hal/nxp_t2080.h index ee219c34ad..4be04b7187 100644 --- a/hal/nxp_t2080.h +++ b/hal/nxp_t2080.h @@ -131,7 +131,7 @@ enum ifc_amask_sizes { /* ---- NOR Flash ---- */ #define FLASH_BANK_SIZE (128*1024*1024) -#define FLASH_PAGE_SIZE (1024) /* program buffer */ +#define FLASH_PAGE_SIZE (512) /* program buffer (256 bytes per chip x 2 chips) */ #define FLASH_SECTOR_SIZE (128*1024) #define FLASH_SECTORS (FLASH_BANK_SIZE / FLASH_SECTOR_SIZE) #define FLASH_CFI_WIDTH 16 /* 8 or 16 */