0

I have an 8x16 matrix of bits as a UINT8 matrix[16].

I want to transpose the matrix and store it as a UINT16 matrix2[8].

This is in a time critical piece of my code, and so I need to do this as fast as possible. Is there a clever way to achieve this on a MIPS processor?

Rocketmagnet
  • 5,656
  • 8
  • 36
  • 47

2 Answers2

0

Maybe something like this:

  lbu $10, matrix
  lbu $11, matrix+1
  lbu $12, matrix+2
  lbu $13, matrix+3
  lbu $14, matrix+4
  lbu $15, matrix+5
  lbu $16, matrix+6
  lbu $17, matrix+7
  lbu $18, matrix+8
  lbu $19, matrix+9
  lbu $20, matrix+10
  lbu $21, matrix+11
  lbu $22, matrix+12
  lbu $23, matrix+13
  lbu $24, matrix+14
  lbu $25, matrix+15

  addiu $2, $0, 8
  addiu $9, $0, 256
loop:
  addiu $2, $2, -1
  srl $9, $9, 1
  addu $27, $0, $0

  and $26, $10, $9
  srlv $26, $26, $2
  or $27, $27, $26

  and $26, $11, $9
  srlv $26, $26, $2
  sll $27, $27, 1
  or $27, $27, $26

  and $26, $12, $9
  srlv $26, $26, $2
  sll $27, $27, 1
  or $27, $27, $26

  and $26, $13, $9
  srlv $26, $26, $2
  sll $27, $27, 1
  or $27, $27, $26

  and $26, $14, $9
  srlv $26, $26, $2
  sll $27, $27, 1
  or $27, $27, $26

  and $26, $15, $9
  srlv $26, $26, $2
  sll $27, $27, 1
  or $27, $27, $26

  and $26, $16, $9
  srlv $26, $26, $2
  sll $27, $27, 1
  or $27, $27, $26

  and $26, $17, $9
  srlv $26, $26, $2
  sll $27, $27, 1
  or $27, $27, $26

  and $26, $18, $9
  srlv $26, $26, $2
  sll $27, $27, 1
  or $27, $27, $26

  and $26, $19, $9
  srlv $26, $26, $2
  sll $27, $27, 1
  or $27, $27, $26

  and $26, $20, $9
  srlv $26, $26, $2
  sll $27, $27, 1
  or $27, $27, $26

  and $26, $21, $9
  srlv $26, $26, $2
  sll $27, $27, 1
  or $27, $27, $26

  and $26, $22, $9
  srlv $26, $26, $2
  sll $27, $27, 1
  or $27, $27, $26

  and $26, $23, $9
  srlv $26, $26, $2
  sll $27, $27, 1
  or $27, $27, $26

  and $26, $24, $9
  srlv $26, $26, $2
  sll $27, $27, 1
  or $27, $27, $26

  and $26, $25, $9
  srlv $26, $26, $2
  sll $27, $27, 1
  or $27, $27, $26

  sll $3, $2, 1
  sh $27, transposed($3)
  bgez  $2, loop
  nop  


.data 0x2000
matrix:  
.byte 0x80
.byte 0x80
.byte 0x40
.byte 0x40
.byte 0x20
.byte 0x20
.byte 0x10
.byte 0x10
.byte 0x08
.byte 0x08
.byte 0x04
.byte 0x04
.byte 0x02
.byte 0x02
.byte 0x01
.byte 0x01

.data 0x3000
transposed:
.half 0
.half 0
.half 0
.half 0
.half 0
.half 0
.half 0
.half 0

It reads the input matrix and then perform a loop 8 times (once for each transposed matrix row).

gusbro
  • 22,357
  • 35
  • 46
0

I don't think that there are any special instructions in the MIPS instruction set that would help with this, so you might just as well code it in C. You could create a user defined instruction if you have access to the processor RTL....

markgz
  • 6,054
  • 1
  • 19
  • 41