1

I am working with an 8051 microcontroller and have found a better way to transfer data to shift registers. Until now, I used the bit-banging technique shifting bits out one at a time starting with the most significant bit. The new method is done with the hardware serial port, however I learned that data is shifted out starting with the least significant bit (totally opposite direction).

To comply, I'll have to re-arrange my data. Currently, the data size in question is 16-bits, and I might also have a 24-bit data size I need to convert as well.

When I did convert only an 8-bit size, I used a lookup table of only 256 bytes. That's about 1/16th of my available code space since I'm cramming all my code onto an AT89C4051 micro.

If I tried the same with a 16-bit size, I'd probably go over the limit since 256 times 256 equals 65K (is that how big the lookup table needs to be?)

So I'm looking for a high-speed algorithm to do the job well.

Here's what I come up with so far in 8051 code which I think is a bit too slow and hard to maintain but it seems to do the job:

;DPTR = 16-bit number to convert

mov R7,#10h ;16 bits
nextbit:
mov A,DPH
rrc A
mov DPH,A
mov A,DPL
rrc A
mov DPL,A
mov A,R2
rlc A
mov R2,A
mov A,R3
rlc A
mov R3,A
djnz R7,nextbit
mov DPH,R3
mov DPL,R2

Can anyone point me to faster code or even a better algorithm I can use rather than shifting bits in and out one by one?

Visually, this is how I'm trying to sort my binary bits. Assume "a" needs to go from the lowest bit position to the highest bit position in the 16-bit number and "b" needs to go from the 2nd lowest bit position to the 2nd highest, and so on. Here is the example:

 ponmlkji:hgfedcba -> abcdefgh:ijklmnop
Mike -- No longer here
  • 2,064
  • 1
  • 15
  • 37
  • @DavidWohlferd: The instruction set of a 8051 is extremely limited. Most instructions are executed on the accumulator, for example the bit shift commands. – user5329483 Feb 04 '18 at 07:33
  • Any time you have a separable problem where what happens to one bit doesn't affect other bits, you can chop up your input and use a small LUT. Sometimes each chunk can use the same LUT (like here), but even if not, that would be 256 + 256 LUT entries instead of 256 * 256, for a problem like GaloisField16 multiplication where the high byte needs a different LUT than the low byte. – Peter Cordes Feb 04 '18 at 09:22

1 Answers1

3

At least you can transfer two bits in each loop iteration, and this halfes the loop count:

mov R7,#8 
nextbit:

  mov A,DPH
  rrc A
  mov DPH,A
  mov A,R2 ;R2 will become the lowbyte of the result
  rlc A 
  mov R2,A

  mov A,DPL
  rrc A
  mov DPL,A
  mov A,R3
  rlc A
  mov R3,A

djnz R7,nextbit
mov DPH,R3
mov DPL,R2

Or with lookup table with 256 entries (256 Bytes):

mov R2, DPH
mov A,  DPL
mov dptr, LookupTable256
movc a,@a+dptr ;Translate lowbyte into hibyte of result
xch A, R2
movc a,@a+dptr
;lowbyte in a, hibyte in R2

Here a version which translate nibble-wise and uses a lookup-table with just 16 entries:

mov R2, DPH
mov R3, DPL
mov dptr, LookupTable16

mov a, R3
and a, #0Fh    ;=Nibble0
movc a,@a+dptr ;Translate
swap           ;swap nibbles
xch A, R3      ; store result nibble 3 
swap 
and a, #0Fh    ;=Nibble1
movc a,@a+dptr ;Translate
orl AR3, A     ;R3=Result nibbles 2+3

mov a, R2
and a, #0Fh    ;=Nibble2
movc a,@a+dptr ;Translate
swap 
xch A, R2      ; store result nibble 1 
swap 
and a, #0Fh    ;=Nibble3
movc a,@a+dptr ;Translate
orl A, R2       ;A=Result nibbles 0+1
user5329483
  • 1,260
  • 7
  • 11