0

I have a circuit which needs to respond in around 0.5uS to an external interrupt. I built the circuit with an STM32F031K6 and a 20MHz oscillator set to run on the 2x PLL, giving a 40MHz clock. I was surprised to see that although one clock cycle would be 25nS, i could only toggle a pin at 300nS - im not exactly sure why it takes so long, i have some experience with 8 bit AVRs and although I wouldn't expect it to run in one clock cycle, 12 seems slow. The external interrupt takes 3uS to respond. how can i choose a chip to meet my requirement of 0.5uS?

I'm just assuming that i need to change the chip, if anyone has advice on how i might reduce the response time that would also be great

my full code is here, this is a blank program generated by cube, i stripped out some of the generated commenting to make it easier to read



int main(void)
{
  MX_GPIO_Init();
  MX_ADC_Init();

  while (1)
  {
  }
}

void SystemClock_Config(void)
{
  RCC_OscInitTypeDef RCC_OscInitStruct = {0};
  RCC_ClkInitTypeDef RCC_ClkInitStruct = {0};

  /** Initializes the CPU, AHB and APB busses clocks 
  */
  RCC_OscInitStruct.OscillatorType = RCC_OSCILLATORTYPE_HSI14|RCC_OSCILLATORTYPE_HSE;
  RCC_OscInitStruct.HSEState = RCC_HSE_ON;
  RCC_OscInitStruct.HSI14State = RCC_HSI14_ON;
  RCC_OscInitStruct.HSI14CalibrationValue = 16;
  RCC_OscInitStruct.PLL.PLLState = RCC_PLL_ON;
  RCC_OscInitStruct.PLL.PLLSource = RCC_PLLSOURCE_HSE;
  RCC_OscInitStruct.PLL.PLLMUL = RCC_PLL_MUL2;
  RCC_OscInitStruct.PLL.PREDIV = RCC_PREDIV_DIV1;
  if (HAL_RCC_OscConfig(&RCC_OscInitStruct) != HAL_OK)
  {
    Error_Handler();
  }
  /** Initializes the CPU, AHB and APB busses clocks 
  */
  RCC_ClkInitStruct.ClockType = RCC_CLOCKTYPE_HCLK|RCC_CLOCKTYPE_SYSCLK
                              |RCC_CLOCKTYPE_PCLK1;
  RCC_ClkInitStruct.SYSCLKSource = RCC_SYSCLKSOURCE_PLLCLK;
  RCC_ClkInitStruct.AHBCLKDivider = RCC_SYSCLK_DIV1;
  RCC_ClkInitStruct.APB1CLKDivider = RCC_HCLK_DIV1;

  if (HAL_RCC_ClockConfig(&RCC_ClkInitStruct, FLASH_LATENCY_1) != HAL_OK)
  {
    Error_Handler();
  }
}

static void MX_ADC_Init(void)
{
  ADC_ChannelConfTypeDef sConfig = {0};

  /** Configure the global features of the ADC (Clock, Resolution, Data Alignment and number of conversion) 
  */
  hadc.Instance = ADC1;
  hadc.Init.ClockPrescaler = ADC_CLOCK_ASYNC_DIV1;
  hadc.Init.Resolution = ADC_RESOLUTION_12B;
  hadc.Init.DataAlign = ADC_DATAALIGN_RIGHT;
  hadc.Init.ScanConvMode = ADC_SCAN_DIRECTION_FORWARD;
  hadc.Init.EOCSelection = ADC_EOC_SINGLE_CONV;
  hadc.Init.LowPowerAutoWait = DISABLE;
  hadc.Init.LowPowerAutoPowerOff = DISABLE;
  hadc.Init.ContinuousConvMode = DISABLE;
  hadc.Init.DiscontinuousConvMode = DISABLE;
  hadc.Init.ExternalTrigConv = ADC_SOFTWARE_START;
  hadc.Init.ExternalTrigConvEdge = ADC_EXTERNALTRIGCONVEDGE_NONE;
  hadc.Init.DMAContinuousRequests = DISABLE;
  hadc.Init.Overrun = ADC_OVR_DATA_PRESERVED;
  if (HAL_ADC_Init(&hadc) != HAL_OK)
  {
    Error_Handler();
  }
  /** Configure for the selected ADC regular channel to be converted. 
  */
  sConfig.Channel = ADC_CHANNEL_0;
  sConfig.Rank = ADC_RANK_CHANNEL_NUMBER;
  sConfig.SamplingTime = ADC_SAMPLETIME_1CYCLE_5;
  if (HAL_ADC_ConfigChannel(&hadc, &sConfig) != HAL_OK)
  {
    Error_Handler();
  }
  /** Configure for the selected ADC regular channel to be converted. 
  */
  sConfig.Channel = ADC_CHANNEL_1;
  if (HAL_ADC_ConfigChannel(&hadc, &sConfig) != HAL_OK)
  {
    Error_Handler();
  }
}

static void MX_GPIO_Init(void)
{
  GPIO_InitTypeDef GPIO_InitStruct = {0};

  /* GPIO Ports Clock Enable */
  __HAL_RCC_GPIOF_CLK_ENABLE();
  __HAL_RCC_GPIOA_CLK_ENABLE();
  __HAL_RCC_GPIOB_CLK_ENABLE();

  /*Configure GPIO pin Output Level */
  HAL_GPIO_WritePin(GPIOB, GPIO_PIN_1|GPIO_PIN_3|GPIO_PIN_4|GPIO_PIN_5 
                          |GPIO_PIN_6|GPIO_PIN_7, GPIO_PIN_RESET);

  /*Configure GPIO pins : PA2 PA11 */
  GPIO_InitStruct.Pin = GPIO_PIN_2|GPIO_PIN_11;
  GPIO_InitStruct.Mode = GPIO_MODE_IT_RISING_FALLING;
  GPIO_InitStruct.Pull = GPIO_NOPULL;
  HAL_GPIO_Init(GPIOA, &GPIO_InitStruct);

  /*Configure GPIO pins : PA3 PA4 PA12 PA15 */
  GPIO_InitStruct.Pin = GPIO_PIN_3|GPIO_PIN_4|GPIO_PIN_12|GPIO_PIN_15;
  GPIO_InitStruct.Mode = GPIO_MODE_INPUT;
  GPIO_InitStruct.Pull = GPIO_NOPULL;
  HAL_GPIO_Init(GPIOA, &GPIO_InitStruct);

  /*Configure GPIO pins : PA6 PA7 */
  GPIO_InitStruct.Pin = GPIO_PIN_6|GPIO_PIN_7;
  GPIO_InitStruct.Mode = GPIO_MODE_AF_PP;
  GPIO_InitStruct.Pull = GPIO_NOPULL;
  GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_LOW;
  GPIO_InitStruct.Alternate = GPIO_AF1_TIM3;
  HAL_GPIO_Init(GPIOA, &GPIO_InitStruct);

  /*Configure GPIO pin : PB0 */
  GPIO_InitStruct.Pin = GPIO_PIN_0;
  GPIO_InitStruct.Mode = GPIO_MODE_AF_PP;
  GPIO_InitStruct.Pull = GPIO_NOPULL;
  GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_LOW;
  GPIO_InitStruct.Alternate = GPIO_AF1_TIM3;
  HAL_GPIO_Init(GPIOB, &GPIO_InitStruct);

  /*Configure GPIO pins : PB1 PB3 PB4 PB5 
                           PB6 PB7 */
  GPIO_InitStruct.Pin = GPIO_PIN_1|GPIO_PIN_3|GPIO_PIN_4|GPIO_PIN_5 
                          |GPIO_PIN_6|GPIO_PIN_7;
  GPIO_InitStruct.Mode = GPIO_MODE_OUTPUT_PP;
  GPIO_InitStruct.Pull = GPIO_NOPULL;
  GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_LOW;
  HAL_GPIO_Init(GPIOB, &GPIO_InitStruct);

  /*Configure GPIO pins : PA8 PA9 PA10 */
  GPIO_InitStruct.Pin = GPIO_PIN_8|GPIO_PIN_9|GPIO_PIN_10;
  GPIO_InitStruct.Mode = GPIO_MODE_AF_PP;
  GPIO_InitStruct.Pull = GPIO_NOPULL;
  GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_LOW;
  GPIO_InitStruct.Alternate = GPIO_AF2_TIM1;
  HAL_GPIO_Init(GPIOA, &GPIO_InitStruct);

  /* EXTI interrupt init*/
  HAL_NVIC_SetPriority(EXTI2_3_IRQn, 0, 0);
  HAL_NVIC_EnableIRQ(EXTI2_3_IRQn);

  HAL_NVIC_SetPriority(EXTI4_15_IRQn, 0, 0);
  HAL_NVIC_EnableIRQ(EXTI4_15_IRQn);

}

void Error_Handler(void)
{
}

#ifdef  USE_FULL_ASSERT
/**
  * @brief  Reports the name of the source file and the source line number
  *         where the assert_param error has occurred.
  * @param  file: pointer to the source file name
  * @param  line: assert_param error line source number
  * @retval None
  */
void assert_failed(uint8_t *file, uint32_t line)
{ 
  /* USER CODE BEGIN 6 */
  /* User can add his own implementation to report the file name and line number,
     tex: printf("Wrong parameters value: file %s on line %d\r\n", file, line) */
  /* USER CODE END 6 */
}
#endif /* USE_FULL_ASSERT */

void EXTI2_3_IRQHandler(void)
{
  HAL_GPIO_EXTI_IRQHandler(GPIO_PIN_2);
}

void EXTI4_15_IRQHandler(void)
{
  HAL_GPIO_EXTI_IRQHandler(GPIO_PIN_11);
  GPIOB->ODR ^= 1<<1;
}
prune
  • 77
  • 1
  • 9
  • Can you specify, what kind of response to the external interrupt you expect? We need to roughly estimate the number of instructions / cycles needed to fulfill your requirements in order to select an appropriate MCU. – Blue Jul 03 '20 at 21:30
  • 0.5uS or as quick as possible – prune Jul 04 '20 at 13:17
  • Why your toggle code take 300us is hard to tell without seeing the code. You should at least disassemble it (or view the disassembly in your debugger) to see what instructions the compiler generates, and you may get better results by using compiler optimisation if you haven't already. If you want to toggle a pin at high (and deterministic) frequency, you should one of the available timer peripherals. But nonetheless that is a different question than that of interrupt latency - you risk distracting from your question by even mentioning that - post a new question if that remains a concern. – Clifford Jul 04 '20 at 17:15
  • Same goes for the interrupt latency - show the code, especially how you are measuring the timing, and inspect the disassembly. – Clifford Jul 04 '20 at 17:17
  • @prun: I understood the 0.5 us requirement. I assume, that measuring this reaction time starts with the edge of the signal triggering the external interrupt. But what needs to be done by the MCU in reaction to this signal? Do you just want to enter the ISR in 0.5 us or is there more to do like toggeling an GPIO etc. ? – Blue Jul 04 '20 at 20:54
  • @Blue yes i would like to enter the ISR in about 0.5uS, there is a little more to do but I think I can do it in assembly with relatively little code. At that moment I would really like to know i am getting into the ISR as fast as is possible – prune Jul 05 '20 at 08:57

2 Answers2

3

First of all, I recommend having a look at this ARM blog post for an in-depth introduction to interrupt latency of ARM Cortex-M processors.

As mentioned by @Colin the interrupt latency of a STM32F0 MCU with a Cortex-M0 core is 16 clock cycles starting when the signal on the EXTI line is asserted until entering the IRQ Handler with code reacting to the event. This clock cycle count cannot be reduced by firmware. When selecting a MCU with a Cortex-M3 or M4 core (e.g. a STM32F3), this required number of clock cycles drops to 12. The resulting latency still depends on the clock frequency of the core. Selecting a STM32 MCU with higher max. clock rate allows for faster reaction times:

  • STM32F0: up to 48 MHz Cortex M0 => ISR enter latency 333 ns
  • STM32G0: up to 64 MHz Cortex M0+ => ISR enter latency 234 ns
  • STM32F3: up to 72 Mhz Cortex M4 => ISR enter latency 166 ns
  • STM32G4: up to 170 Mhz Cortex M4 => ISR enter latency 70 ns

These calculation do not solve your problem though, because we have to consider as well what happens after the MCU entered the service routine. Several things come to my mind here:

  1. Backup of additional registers depending on the complexity of the ISR
  2. Code for an application specific reaction to the event (e.g. toggling an GPIO)
  3. Wait-states for FLASH / RAM / Peripheral accesses. The higher the core clock, the more wait-states are typically needed because external parts are clocked at lower frequency.
  4. Code for acknowledging / clearing the interrupt request

The last point can be postponed behind the application specific response an thus does not necessarily count to the reaction time, but all other points can have a significant impact. In order to fulfill your requirement with a cost-efficient STM32 MCU (I suppose you have selected the STM32F0 for this reason) you need to have good control over the number of instructions in the ISR. I do not recommend to use assembler here, but you should not rely on the CubeMX HAL implementation.

Since you say you need only little code in the ISR lets do a quick estimation:

  1. Saving two additional registers on the stack => 2 instructions
  2. Toggle a GPIO with a read-modify-write sequence => 3 instructions

Let's assume that each instruction takes 2 cycles, we need another 10 clock cycles. Using this best-case scenario we can have a look at our list with rather low-cost STM32 MCU's again:

  • STM32F0: 16 + 10 cycles at 48 MHz => 541 ns
  • STM32G0: 15 + 10 cycles at 64 MHz => 390 ns
  • STM32F3: 12 + 10 cycles at 72 MHz => 300 ns
  • STM32G4: 12 + 10 cycles at 170 MHz => 130 ns

With these numbers, a Cortex M0/M0+ looks not like the right choice. You will better go for a M3/M4 core with at least 64 MHz clock rate. I think the new G4 could be a good solution.

Anyway, I strongly recommend evaluating the real-world performance with the real-world requirement, since there are too many factors that can affect above latency calculations.

Blue
  • 820
  • 4
  • 17
0

The Cortex-M processors push a stack frame on exception entry, for your Cortex M0 the minimum time from assertion of the exception to running the first instruction of the interrupt handler is 16 clock cycles (assuming zero wait state memory).

The only way to make this take less time is to use a higher clock speed.

Colin
  • 3,394
  • 1
  • 21
  • 29
  • ok, 16 cycles at 25ns is 400nS, so that sounds like it's the right ballpark. I'm using STM32cubeIDE and I haven't yet figured out how to view disassembly, but is there an obvious reason that might account for this difference? – prune Jul 03 '20 at 15:43
  • im running a blank program - nothing in the main loop, only in the interrupt handler which reads void EXTI4_15_IRQHandler(void) { HAL_GPIO_EXTI_IRQHandler(GPIO_PIN_11); GPIOB->ODR ^= 1<<1; } i don't know what the HAL_GPIO_EXTI_IRQHandler is, cubemx put it there – prune Jul 03 '20 at 16:21
  • That’s how the event gets propagated through the library, you could remove the function call if you don’t need it, also you can use the GPIOs BSRR register to avoid a read modify write of the ODR – Colin Jul 03 '20 at 19:23
  • is it possible to run the interrupt without any context saving? – prune Jul 03 '20 at 21:14
  • do you know a way i can see exactly what is happening - how much time its taking to respond and how much time its taking for context saving? I would like to know why its so much slower than the 16 cycles suggested – prune Jul 04 '20 at 11:05
  • What does your clock configuration code look like? Is it definitely at 40 MHz? Is the peripheral bus running at the same speed? Have you removed the call to HAL_GPIO...? – Colin Jul 04 '20 at 11:46
  • i posted the clock settings above in my original comment so you can see them. If i take out the call to HAL_GPIO it doesn't seem to work but ill keep trying... – prune Jul 04 '20 at 13:33
  • @prune `^=` implies a read-modify-write. Perhaps: `GPIOB->ODR = x; x ^= 0x02;` so the pin is toggled as early as possible in the ISR. The HAL is built for comfort not for speed; `HAL_GPIO_EXTI_IRQHandle` will be called from the `EXTI4_15_IRQHandler` - you might also do better to use an EXTI that is not on a shared interrupt - except your part does not have one; if you are not using other EXTI lines, implement the `EXTI4_15_IRQHandler` handler directly to avoid the overhead of determining the interrupt source and a function call overhead - but mostly avoid the HAL (or CuibeMX at all) – Clifford Jul 04 '20 at 17:41
  • @prune ... if you want nano-second orders or performance, don't use generated or library code is my advice. But mostly unless you show your code it is hard to advise your PLL configuration code is not the issue. – Clifford Jul 04 '20 at 17:44
  • @Clifford I have posted the full program code above, its a blank program except for the setup generated by cube. re. the EXT14_15_IRQhandler, i can only use one interrupt on it but it doesn't seem to work if i comment that line out – prune Jul 05 '20 at 09:00