@@ -1021,143 +1021,93 @@ void Adafruit_NeoPixel::show(void) {
1021
1021
1022
1022
// Arduino 101 -----------------------------------------------------------
1023
1023
1024
- PinDescription *pindesc = &g_APinDescription[pin];
1025
- uint8_t *p = pixels, *end = p + numBytes;
1026
- register uint8_t pix, mask;
1024
+ #define NOPx7 { __builtin_arc_nop (); \
1025
+ __builtin_arc_nop (); __builtin_arc_nop (); \
1026
+ __builtin_arc_nop (); __builtin_arc_nop (); \
1027
+ __builtin_arc_nop (); __builtin_arc_nop (); }
1027
1028
1029
+ PinDescription *pindesc = &g_APinDescription[pin];
1030
+ register uint32_t loop = 8 * numBytes; // one loop to handle all bytes and all bits
1031
+ register uint8_t *p = pixels;
1032
+ register uint32_t currByte = (uint32_t ) (*p);
1033
+ register uint32_t currBit = 0x80 & currByte;
1034
+ register uint32_t bitCounter = 0 ;
1035
+ register uint32_t first = 1 ;
1036
+
1037
+ // The loop is unusual. Very first iteration puts all the way LOW to the wire -
1038
+ // constant LOW does not affect NEOPIXEL, so there is no visible effect displayed.
1039
+ // During that very first iteration CPU caches instructions in the loop.
1040
+ // Because of the caching process, "CPU slows down". NEOPIXEL pulse is very time sensitive
1041
+ // that's why we let the CPU cache first and we start regular pulse from 2nd iteration
1028
1042
if (pindesc->ulGPIOType == SS_GPIO) {
1029
1043
register uint32_t reg = pindesc->ulGPIOBase + SS_GPIO_SWPORTA_DR;
1030
- register uint32_t reg_val = __builtin_arc_lr ((volatile uint32_t )reg);
1044
+ uint32_t reg_val = __builtin_arc_lr ((volatile uint32_t )reg);
1031
1045
register uint32_t reg_bit_high = reg_val | (1 << pindesc->ulGPIOId );
1032
1046
register uint32_t reg_bit_low = reg_val & ~(1 << pindesc->ulGPIOId );
1033
1047
1034
- while (p < end) {
1035
- pix = *p++;
1036
- for (mask = 0x80 ; mask; mask >>= 1 ) {
1037
- __builtin_arc_sr (reg_bit_high, (volatile uint32_t )reg);
1038
- if (pix & mask) {
1039
- __builtin_arc_nop ();
1040
- __builtin_arc_nop ();
1041
- __builtin_arc_nop ();
1042
- __builtin_arc_nop ();
1043
- __builtin_arc_nop ();
1044
- __builtin_arc_nop ();
1045
- __builtin_arc_nop ();
1046
- __builtin_arc_nop ();
1047
- __builtin_arc_nop ();
1048
- __builtin_arc_nop ();
1049
- __builtin_arc_nop ();
1050
- __builtin_arc_nop ();
1051
- __builtin_arc_nop ();
1052
- __builtin_arc_nop ();
1053
- __builtin_arc_nop ();
1054
- __builtin_arc_nop ();
1055
- __builtin_arc_sr (reg_bit_low, (volatile uint32_t )reg);
1056
- __builtin_arc_nop ();
1057
- __builtin_arc_nop ();
1058
- __builtin_arc_nop ();
1059
- __builtin_arc_nop ();
1060
- __builtin_arc_nop ();
1061
- __builtin_arc_nop ();
1062
- __builtin_arc_nop ();
1063
- __builtin_arc_nop ();
1064
- } else {
1065
- __builtin_arc_nop ();
1066
- __builtin_arc_nop ();
1067
- __builtin_arc_nop ();
1068
- __builtin_arc_nop ();
1069
- __builtin_arc_nop ();
1070
- __builtin_arc_nop ();
1071
- __builtin_arc_nop ();
1072
- __builtin_arc_nop ();
1073
- __builtin_arc_nop ();
1074
- __builtin_arc_nop ();
1075
- __builtin_arc_sr (reg_bit_low, (volatile uint32_t )reg);
1076
- __builtin_arc_nop ();
1077
- __builtin_arc_nop ();
1078
- __builtin_arc_nop ();
1079
- __builtin_arc_nop ();
1080
- __builtin_arc_nop ();
1081
- __builtin_arc_nop ();
1082
- __builtin_arc_nop ();
1083
- __builtin_arc_nop ();
1084
- __builtin_arc_nop ();
1085
- __builtin_arc_nop ();
1086
- __builtin_arc_nop ();
1087
- __builtin_arc_nop ();
1088
- __builtin_arc_nop ();
1089
- __builtin_arc_nop ();
1090
- __builtin_arc_nop ();
1091
- __builtin_arc_nop ();
1092
- }
1048
+ loop += 1 ; // include first, special iteration
1049
+ while (loop--) {
1050
+ if (!first) {
1051
+ currByte <<= 1 ;
1052
+ bitCounter++;
1053
+ }
1054
+
1055
+ // 1 is >550ns high and >450ns low; 0 is 200..500ns high and >450ns low
1056
+ __builtin_arc_sr (first ? reg_bit_low : reg_bit_high, (volatile uint32_t )reg);
1057
+ if (currBit) { // ~400ns HIGH (740ns overall)
1058
+ NOPx7
1059
+ NOPx7
1060
+ }
1061
+ // ~340ns HIGH
1062
+ NOPx7
1063
+ __builtin_arc_nop ();
1064
+
1065
+ // 820ns LOW; per spec, max allowed low here is 5000ns */
1066
+ __builtin_arc_sr (reg_bit_low, (volatile uint32_t )reg);
1067
+ NOPx7
1068
+ NOPx7
1069
+
1070
+ if (bitCounter >= 8 ) {
1071
+ bitCounter = 0 ;
1072
+ currByte = (uint32_t ) (*++p);
1093
1073
}
1074
+
1075
+ currBit = 0x80 & currByte;
1076
+ first = 0 ;
1094
1077
}
1095
1078
} else if (pindesc->ulGPIOType == SOC_GPIO) {
1096
1079
register uint32_t reg = pindesc->ulGPIOBase + SOC_GPIO_SWPORTA_DR;
1097
- register uint32_t reg_val = MMIO_REG_VAL (reg);
1080
+ uint32_t reg_val = MMIO_REG_VAL (reg);
1098
1081
register uint32_t reg_bit_high = reg_val | (1 << pindesc->ulGPIOId );
1099
1082
register uint32_t reg_bit_low = reg_val & ~(1 << pindesc->ulGPIOId );
1100
1083
1101
- while (p < end) {
1102
- pix = *p++;
1103
- for (mask = 0x80 ; mask; mask >>= 1 ) {
1104
- MMIO_REG_VAL (reg) = reg_bit_high;
1105
- if (pix & mask) {
1106
- __builtin_arc_nop ();
1107
- __builtin_arc_nop ();
1108
- __builtin_arc_nop ();
1109
- __builtin_arc_nop ();
1110
- __builtin_arc_nop ();
1111
- __builtin_arc_nop ();
1112
- __builtin_arc_nop ();
1113
- __builtin_arc_nop ();
1114
- __builtin_arc_nop ();
1115
- __builtin_arc_nop ();
1116
- __builtin_arc_nop ();
1117
- __builtin_arc_nop ();
1118
- __builtin_arc_nop ();
1119
- __builtin_arc_nop ();
1120
- __builtin_arc_nop ();
1121
- __builtin_arc_nop ();
1122
- MMIO_REG_VAL (reg) = reg_bit_low;
1123
- __builtin_arc_nop ();
1124
- __builtin_arc_nop ();
1125
- __builtin_arc_nop ();
1126
- __builtin_arc_nop ();
1127
- __builtin_arc_nop ();
1128
- __builtin_arc_nop ();
1129
- __builtin_arc_nop ();
1130
- __builtin_arc_nop ();
1131
- } else {
1132
- __builtin_arc_nop ();
1133
- __builtin_arc_nop ();
1134
- __builtin_arc_nop ();
1135
- __builtin_arc_nop ();
1136
- __builtin_arc_nop ();
1137
- __builtin_arc_nop ();
1138
- __builtin_arc_nop ();
1139
- __builtin_arc_nop ();
1140
- __builtin_arc_nop ();
1141
- __builtin_arc_nop ();
1142
- MMIO_REG_VAL (reg) = reg_bit_low;
1143
- __builtin_arc_nop ();
1144
- __builtin_arc_nop ();
1145
- __builtin_arc_nop ();
1146
- __builtin_arc_nop ();
1147
- __builtin_arc_nop ();
1148
- __builtin_arc_nop ();
1149
- __builtin_arc_nop ();
1150
- __builtin_arc_nop ();
1151
- __builtin_arc_nop ();
1152
- __builtin_arc_nop ();
1153
- __builtin_arc_nop ();
1154
- __builtin_arc_nop ();
1155
- __builtin_arc_nop ();
1156
- __builtin_arc_nop ();
1157
- __builtin_arc_nop ();
1158
- __builtin_arc_nop ();
1159
- }
1084
+ loop += 1 ; // include first, special iteration
1085
+ while (loop--) {
1086
+ if (!first) {
1087
+ currByte <<= 1 ;
1088
+ bitCounter++;
1160
1089
}
1090
+ MMIO_REG_VAL (reg) = first ? reg_bit_low : reg_bit_high;
1091
+ if (currBit) { // ~430ns HIGH (740ns overall)
1092
+ NOPx7
1093
+ NOPx7
1094
+ __builtin_arc_nop ();
1095
+ }
1096
+ // ~310ns HIGH
1097
+ NOPx7
1098
+
1099
+ // 850ns LOW; per spec, max allowed low here is 5000ns */
1100
+ MMIO_REG_VAL (reg) = reg_bit_low;
1101
+ NOPx7
1102
+ NOPx7
1103
+
1104
+ if (bitCounter >= 8 ) {
1105
+ bitCounter = 0 ;
1106
+ currByte = (uint32_t ) (*++p);
1107
+ }
1108
+
1109
+ currBit = 0x80 & currByte;
1110
+ first = 0 ;
1161
1111
}
1162
1112
}
1163
1113
0 commit comments