/* sp.c
 *
 * Copyright (C) 2006-2025 wolfSSL Inc.
 *
 * This file is part of wolfSSL.
 *
 * wolfSSL is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 *
 * wolfSSL is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
 */

/* Implementation by Sean Parkinson. */

#include <wolfssl/wolfcrypt/libwolfssl_sources.h>

#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) || \
    defined(WOLFSSL_HAVE_SP_ECC)

#include <wolfssl/wolfcrypt/cpuid.h>
#ifdef NO_INLINE
    #include <wolfssl/wolfcrypt/misc.h>
#else
    #define WOLFSSL_MISC_INCLUDED
    #include <wolfcrypt/src/misc.c>
#endif

#ifdef RSA_LOW_MEM
#ifndef WOLFSSL_SP_SMALL
#define WOLFSSL_SP_SMALL
#endif
#endif

#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
#undef WOLFSSL_SP_SMALL_STACK
#define WOLFSSL_SP_SMALL_STACK
#endif

#include <wolfssl/wolfcrypt/sp.h>

#ifdef __IAR_SYSTEMS_ICC__
#define __asm__        asm
#define __volatile__   volatile
#define WOLFSSL_NO_VAR_ASSIGN_REG
#endif /* __IAR_SYSTEMS_ICC__ */
#ifdef __KEIL__
#define __asm__        __asm
#define __volatile__   volatile
#endif

#ifdef WOLFSSL_SP_SMALL_STACK
    #define SP_DECL_VAR(TYPE, NAME, CNT)                                \
        TYPE* NAME = NULL
    #define SP_ALLOC_VAR(TYPE, NAME, CNT, HEAP, DT)                     \
        if (err == MP_OKAY) {                                           \
            (NAME) = (TYPE*)XMALLOC(sizeof(TYPE) * (CNT), (HEAP), DT);  \
            if ((NAME) == NULL) {                                       \
                err = MEMORY_E;                                         \
            }                                                           \
        }

    #define SP_VAR_OK(NAME)          ((NAME) != NULL)

    #define SP_FREE_VAR(NAME, HEAP, DT)                                 \
        XFREE(NAME, (HEAP), DT)
    #define SP_ZEROFREE_VAR(TYPE, NAME, CNT, HEAP, DT)                  \
        do {                                                            \
            if ((NAME) != NULL) {                                       \
                ForceZero(NAME, sizeof(TYPE) * (CNT));                  \
            }                                                           \
            SP_FREE_VAR(NAME, HEAP, DT);                                \
        } while (0)
    #define SP_ZEROFREE_VAR_ALT(TYPE, NAME, FZ_NAME, CNT, HEAP, DT)     \
        do {                                                            \
            if ((FZ_NAME) != NULL) {                                    \
                ForceZero(FZ_NAME, sizeof(TYPE) * (CNT));               \
            }                                                           \
            SP_FREE_VAR(NAME, HEAP, DT);                                \
        } while (0)
#else
    #define SP_DECL_VAR(TYPE, NAME, CNT)                                \
        TYPE NAME[CNT]
    #define SP_ALLOC_VAR(TYPE, NAME, CNT, HEAP, DT)                     \
        WC_DO_NOTHING
    #define SP_VAR_OK(NAME)          (1)
    #define SP_FREE_VAR(NAME, HEAP, DT)                                 \
        WC_DO_NOTHING
    #define SP_ZEROFREE_VAR(TYPE, NAME, CNT, HEAP, DT)                  \
        do {                                                            \
            if ((NAME) != NULL) {                                       \
                ForceZero(NAME, sizeof(TYPE) * (CNT));                  \
            }                                                           \
        } while (0)
    #define SP_ZEROFREE_VAR_ALT(TYPE, NAME, FZ_NAME, CNT, HEAP, DT)     \
        do {                                                            \
            if ((FZ_NAME) != NULL) {                                    \
                ForceZero(FZ_NAME, sizeof(TYPE) * (CNT));               \
            }                                                           \
        } while (0)
#endif

#ifdef WOLFSSL_SP_ARM_THUMB_ASM
#define SP_PRINT_NUM(var, name, total, words, bits)         \
    do {                                                    \
        int ii;                                             \
        fprintf(stderr, name "=0x");                        \
        for (ii = (((bits) + 31) / 32) - 1; ii >= 0; ii--)  \
            fprintf(stderr, SP_PRINT_FMT, (var)[ii]);       \
        fprintf(stderr, "\n");                              \
    } while (0)

#define SP_PRINT_VAL(var, name)                             \
    fprintf(stderr, name "=0x" SP_PRINT_FMT "\n", var)

#define SP_PRINT_INT(var, name)                             \
    fprintf(stderr, name "=%d\n", var)

#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
#ifndef WOLFSSL_SP_NO_2048
/* Read big endian unsigned byte array into r.
 *
 * r  A single precision integer.
 * size  Maximum number of bytes to convert
 * a  Byte array.
 * n  Number of bytes in array to read.
 */
static void sp_2048_from_bin(sp_digit* r, int size, const byte* a, int n)
{
    int i;
    int j;
    byte* d;

    j = 0;
    for (i = n - 1; i >= 3; i -= 4) {
        r[j]  = ((sp_digit)a[i - 0] <<  0) |
                ((sp_digit)a[i - 1] <<  8) |
                ((sp_digit)a[i - 2] << 16) |
                ((sp_digit)a[i - 3] << 24);
        j++;
    }

    if (i >= 0) {
        r[j] = 0;

        d = (byte*)(r + j);
#ifdef BIG_ENDIAN_ORDER
        switch (i) {
            case 2: d[1] = *(a++); //fallthrough
            case 1: d[2] = *(a++); //fallthrough
            case 0: d[3] = *a    ; //fallthrough
        }
#else
        switch (i) {
            case 2: d[i-2] = a[2]; //fallthrough
            case 1: d[i-1] = a[1]; //fallthrough
            case 0: d[i-0] = a[0]; //fallthrough
        }
#endif
        j++;
    }

    for (; j < size; j++) {
        r[j] = 0;
    }
}

/* Convert an mp_int to an array of sp_digit.
 *
 * r  A single precision integer.
 * size  Maximum number of bytes to convert
 * a  A multi-precision integer.
 */
static void sp_2048_from_mp(sp_digit* r, int size, const mp_int* a)
{
#if DIGIT_BIT == 32
    int i;
    sp_digit j = (sp_digit)0 - (sp_digit)a->used;
    int o = 0;

    for (i = 0; i < size; i++) {
        sp_digit mask = (sp_digit)0 - (j >> 31);
        r[i] = a->dp[o] & mask;
        j++;
        o += (int)(j >> 31);
    }
#elif DIGIT_BIT > 32
    unsigned int i;
    int j = 0;
    word32 s = 0;

    r[0] = 0;
    for (i = 0; i < (unsigned int)a->used && j < size; i++) {
        r[j] |= ((sp_digit)a->dp[i] << s);
        r[j] &= 0xffffffff;
        s = 32U - s;
        if (j + 1 >= size) {
            break;
        }
        /* lint allow cast of mismatch word32 and mp_digit */
        r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
        while ((s + 32U) <= (word32)DIGIT_BIT) {
            s += 32U;
            r[j] &= 0xffffffff;
            if (j + 1 >= size) {
                break;
            }
            if (s < (word32)DIGIT_BIT) {
                /* lint allow cast of mismatch word32 and mp_digit */
                r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
            }
            else {
                r[++j] = (sp_digit)0;
            }
        }
        s = (word32)DIGIT_BIT - s;
    }

    for (j++; j < size; j++) {
        r[j] = 0;
    }
#else
    unsigned int i;
    int j = 0;
    int s = 0;

    r[0] = 0;
    for (i = 0; i < (unsigned int)a->used && j < size; i++) {
        r[j] |= ((sp_digit)a->dp[i]) << s;
        if (s + DIGIT_BIT >= 32) {
            r[j] &= 0xffffffff;
            if (j + 1 >= size) {
                break;
            }
            s = 32 - s;
            if (s == DIGIT_BIT) {
                r[++j] = 0;
                s = 0;
            }
            else {
                r[++j] = a->dp[i] >> s;
                s = DIGIT_BIT - s;
            }
        }
        else {
            s += DIGIT_BIT;
        }
    }

    for (j++; j < size; j++) {
        r[j] = 0;
    }
#endif
}

/* Write r as big endian to byte array.
 * Fixed length number of bytes written: 256
 *
 * r  A single precision integer.
 * a  Byte array.
 */
static void sp_2048_to_bin_64(sp_digit* r, byte* a)
{
    int i;
    int j = 0;

    for (i = 63; i >= 0; i--) {
        a[j++] = r[i] >> 24;
        a[j++] = r[i] >> 16;
        a[j++] = r[i] >> 8;
        a[j++] = r[i] >> 0;
    }
}

#if (defined(WOLFSSL_HAVE_SP_RSA) && (!defined(WOLFSSL_RSA_PUBLIC_ONLY) || !defined(WOLFSSL_SP_SMALL))) || defined(WOLFSSL_HAVE_SP_DH)
/* Normalize the values in each word to 32.
 *
 * a  Array of sp_digit to normalize.
 */
#define sp_2048_norm_64(a)

#endif /* (WOLFSSL_HAVE_SP_RSA && (!WOLFSSL_RSA_PUBLIC_ONLY || !WOLFSSL_SP_SMALL)) || WOLFSSL_HAVE_SP_DH */
/* Normalize the values in each word to 32.
 *
 * a  Array of sp_digit to normalize.
 */
#define sp_2048_norm_64(a)

#ifndef WOLFSSL_SP_SMALL
#ifndef WOLFSSL_SP_LARGE_CODE
/* Multiply a and b into r. (r = a * b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    sp_digit t[8 * 2];
    sp_digit* tmp = t;
    __asm__ __volatile__ (
        "movs	r3, #0\n\t"
        "movs	r4, #0\n\t"
        "mov	r8, r3\n\t"
        "mov	r11, %[tmp]\n\t"
        "mov	r9, %[a]\n\t"
        "mov	r10, %[b]\n\t"
        "movs	r6, #32\n\t"
        "add	r6, r6, r9\n\t"
        "mov	r12, r6\n\t"
        "\n"
    "L_sp_2048_mul_8_words_%=:\n\t"
        "movs	%[tmp], #0\n\t"
        "movs	r5, #0\n\t"
        "movs	r6, #28\n\t"
        "mov	%[a], r8\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	%[a], %[a], r6\n\t"
#else
        "sub	%[a], %[a], r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r6, r6, r6\n\t"
#elif defined(__clang__)
        "sbcs	r6, r6\n\t"
#else
        "sbc	r6, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "mvns	r6, r6\n\t"
#else
        "mvn	r6, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "ands	%[a], %[a], r6\n\t"
#elif defined(__clang__)
        "ands	%[a], r6\n\t"
#else
        "and	%[a], r6\n\t"
#endif
        "mov	%[b], r8\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	%[b], %[b], %[a]\n\t"
#else
        "sub	%[b], %[b], %[a]\n\t"
#endif
        "add	%[a], %[a], r9\n\t"
        "add	%[b], %[b], r10\n\t"
        "\n"
    "L_sp_2048_mul_8_mul_%=:\n\t"
        "# Multiply Start\n\t"
        "ldrh	r6, [%[a]]\n\t"
        "ldrh	r7, [%[b]]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[tmp]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[tmp]\n\t"
#else
        "adc	r4, %[tmp]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[tmp]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[tmp]\n\t"
#else
        "adc	r5, %[tmp]\n\t"
#endif
        "ldr	r7, [%[b]]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r7, #16\n\t"
#else
        "lsr	r7, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[tmp]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[tmp]\n\t"
#else
        "adc	r5, %[tmp]\n\t"
#endif
        "ldr	r6, [%[a]]\n\t"
        "ldr	r7, [%[b]]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, #16\n\t"
#else
        "lsr	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r7, #16\n\t"
#else
        "lsr	r7, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[tmp]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[tmp]\n\t"
#else
        "adc	r5, %[tmp]\n\t"
#endif
        "ldrh	r7, [%[b]]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[tmp]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[tmp]\n\t"
#else
        "adc	r5, %[tmp]\n\t"
#endif
        "# Multiply Done\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	%[a], %[a], #4\n\t"
#else
        "add	%[a], %[a], #4\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	%[b], %[b], #4\n\t"
#else
        "sub	%[b], %[b], #4\n\t"
#endif
        "cmp	%[a], r12\n\t"
        "beq	L_sp_2048_mul_8_done_mul_%=\n\t"
        "mov	r6, r8\n\t"
        "add	r6, r6, r9\n\t"
        "cmp	%[a], r6\n\t"
        "ble	L_sp_2048_mul_8_mul_%=\n\t"
        "\n"
    "L_sp_2048_mul_8_done_mul_%=:\n\t"
        "mov	%[tmp], r11\n\t"
        "mov	r7, r8\n\t"
        "str	r3, [%[tmp], r7]\n\t"
        "movs	r3, r4\n\t"
        "movs	r4, r5\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r7, r7, #4\n\t"
#else
        "add	r7, r7, #4\n\t"
#endif
        "mov	r8, r7\n\t"
        "movs	r6, #56\n\t"
        "cmp	r7, r6\n\t"
        "ble	L_sp_2048_mul_8_words_%=\n\t"
        "str	r3, [%[tmp], r7]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        : [a] "+l" (a), [b] "+l" (b), [tmp] "+l" (tmp)
        :
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc"
    );

    XMEMCPY(r, t, sizeof(t));
}

#else
/* Multiply a and b into r. (r = a * b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    __asm__ __volatile__ (
        "sub	sp, sp, #32\n\t"
        "mov	r8, %[r]\n\t"
        "mov	r9, %[a]\n\t"
        "mov	r10, %[b]\n\t"
        "movs	%[r], #0\n\t"
        "#  A[0] * B[0]\n\t"
        "ldr	%[a], [%[a]]\n\t"
        "ldr	%[b], [%[b]]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r3, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r3, r6, r3\n\t"
#elif defined(__clang__)
        "muls	r3, r6\n\t"
#else
        "mul	r3, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r4, r6, #16\n\t"
#else
        "lsr	r4, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
        "movs	r5, #0\n\t"
        "str	r3, [sp]\n\t"
        "#  A[0] * B[1]\n\t"
        "movs	r3, #0\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a]]\n\t"
        "ldr	%[b], [%[b], #4]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[1] * B[0]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #4]\n\t"
        "ldr	%[b], [%[b]]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "str	r4, [sp, #4]\n\t"
        "#  A[2] * B[0]\n\t"
        "movs	r4, #0\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #8]\n\t"
        "ldr	%[b], [%[b]]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[1] * B[1]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #4]\n\t"
        "ldr	%[b], [%[b], #4]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[0] * B[2]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a]]\n\t"
        "ldr	%[b], [%[b], #8]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "str	r5, [sp, #8]\n\t"
        "#  A[0] * B[3]\n\t"
        "movs	r5, #0\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a]]\n\t"
        "ldr	%[b], [%[b], #12]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[1] * B[2]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #4]\n\t"
        "ldr	%[b], [%[b], #8]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[2] * B[1]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #8]\n\t"
        "ldr	%[b], [%[b], #4]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[3] * B[0]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #12]\n\t"
        "ldr	%[b], [%[b]]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "str	r3, [sp, #12]\n\t"
        "#  A[4] * B[0]\n\t"
        "movs	r3, #0\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #16]\n\t"
        "ldr	%[b], [%[b]]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[3] * B[1]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #12]\n\t"
        "ldr	%[b], [%[b], #4]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[2] * B[2]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #8]\n\t"
        "ldr	%[b], [%[b], #8]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[1] * B[3]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #4]\n\t"
        "ldr	%[b], [%[b], #12]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[0] * B[4]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a]]\n\t"
        "ldr	%[b], [%[b], #16]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "str	r4, [sp, #16]\n\t"
        "#  A[0] * B[5]\n\t"
        "movs	r4, #0\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a]]\n\t"
        "ldr	%[b], [%[b], #20]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[1] * B[4]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #4]\n\t"
        "ldr	%[b], [%[b], #16]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[2] * B[3]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #8]\n\t"
        "ldr	%[b], [%[b], #12]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[3] * B[2]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #12]\n\t"
        "ldr	%[b], [%[b], #8]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[4] * B[1]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #16]\n\t"
        "ldr	%[b], [%[b], #4]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[5] * B[0]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #20]\n\t"
        "ldr	%[b], [%[b]]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "str	r5, [sp, #20]\n\t"
        "#  A[6] * B[0]\n\t"
        "movs	r5, #0\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #24]\n\t"
        "ldr	%[b], [%[b]]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[5] * B[1]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #20]\n\t"
        "ldr	%[b], [%[b], #4]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[4] * B[2]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #16]\n\t"
        "ldr	%[b], [%[b], #8]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[3] * B[3]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #12]\n\t"
        "ldr	%[b], [%[b], #12]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[2] * B[4]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #8]\n\t"
        "ldr	%[b], [%[b], #16]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[1] * B[5]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #4]\n\t"
        "ldr	%[b], [%[b], #20]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[0] * B[6]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a]]\n\t"
        "ldr	%[b], [%[b], #24]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "str	r3, [sp, #24]\n\t"
        "#  A[0] * B[7]\n\t"
        "movs	r3, #0\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a]]\n\t"
        "ldr	%[b], [%[b], #28]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[1] * B[6]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #4]\n\t"
        "ldr	%[b], [%[b], #24]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[2] * B[5]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #8]\n\t"
        "ldr	%[b], [%[b], #20]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[3] * B[4]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #12]\n\t"
        "ldr	%[b], [%[b], #16]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[4] * B[3]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #16]\n\t"
        "ldr	%[b], [%[b], #12]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[5] * B[2]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #20]\n\t"
        "ldr	%[b], [%[b], #8]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[6] * B[1]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #24]\n\t"
        "ldr	%[b], [%[b], #4]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[7] * B[0]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #28]\n\t"
        "ldr	%[b], [%[b]]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "str	r4, [sp, #28]\n\t"
        "#  A[7] * B[1]\n\t"
        "movs	r4, #0\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #28]\n\t"
        "ldr	%[b], [%[b], #4]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[6] * B[2]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #24]\n\t"
        "ldr	%[b], [%[b], #8]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[5] * B[3]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #20]\n\t"
        "ldr	%[b], [%[b], #12]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[4] * B[4]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #16]\n\t"
        "ldr	%[b], [%[b], #16]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[3] * B[5]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #12]\n\t"
        "ldr	%[b], [%[b], #20]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[2] * B[6]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #8]\n\t"
        "ldr	%[b], [%[b], #24]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[1] * B[7]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #4]\n\t"
        "ldr	%[b], [%[b], #28]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "mov	%[r], r8\n\t"
        "str	r5, [%[r], #32]\n\t"
        "movs	%[r], #0\n\t"
        "#  A[2] * B[7]\n\t"
        "movs	r5, #0\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #8]\n\t"
        "ldr	%[b], [%[b], #28]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[3] * B[6]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #12]\n\t"
        "ldr	%[b], [%[b], #24]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[4] * B[5]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #16]\n\t"
        "ldr	%[b], [%[b], #20]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[5] * B[4]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #20]\n\t"
        "ldr	%[b], [%[b], #16]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[6] * B[3]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #24]\n\t"
        "ldr	%[b], [%[b], #12]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[7] * B[2]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #28]\n\t"
        "ldr	%[b], [%[b], #8]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "mov	%[r], r8\n\t"
        "str	r3, [%[r], #36]\n\t"
        "movs	%[r], #0\n\t"
        "#  A[7] * B[3]\n\t"
        "movs	r3, #0\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #28]\n\t"
        "ldr	%[b], [%[b], #12]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[6] * B[4]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #24]\n\t"
        "ldr	%[b], [%[b], #16]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[5] * B[5]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #20]\n\t"
        "ldr	%[b], [%[b], #20]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[4] * B[6]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #16]\n\t"
        "ldr	%[b], [%[b], #24]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[3] * B[7]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #12]\n\t"
        "ldr	%[b], [%[b], #28]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "mov	%[r], r8\n\t"
        "str	r4, [%[r], #40]\n\t"
        "movs	%[r], #0\n\t"
        "#  A[4] * B[7]\n\t"
        "movs	r4, #0\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #16]\n\t"
        "ldr	%[b], [%[b], #28]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[5] * B[6]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #20]\n\t"
        "ldr	%[b], [%[b], #24]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[6] * B[5]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #24]\n\t"
        "ldr	%[b], [%[b], #20]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[7] * B[4]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #28]\n\t"
        "ldr	%[b], [%[b], #16]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "mov	%[r], r8\n\t"
        "str	r5, [%[r], #44]\n\t"
        "movs	%[r], #0\n\t"
        "#  A[7] * B[5]\n\t"
        "movs	r5, #0\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #28]\n\t"
        "ldr	%[b], [%[b], #20]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[6] * B[6]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #24]\n\t"
        "ldr	%[b], [%[b], #24]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[5] * B[7]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #20]\n\t"
        "ldr	%[b], [%[b], #28]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "mov	%[r], r8\n\t"
        "str	r3, [%[r], #48]\n\t"
        "movs	%[r], #0\n\t"
        "#  A[6] * B[7]\n\t"
        "movs	r3, #0\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #24]\n\t"
        "ldr	%[b], [%[b], #28]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[7] * B[6]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #28]\n\t"
        "ldr	%[b], [%[b], #24]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "mov	%[r], r8\n\t"
        "str	r4, [%[r], #52]\n\t"
        "movs	%[r], #0\n\t"
        "#  A[7] * B[7]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #28]\n\t"
        "ldr	%[b], [%[b], #28]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
        "mov	%[r], r8\n\t"
        "str	r5, [%[r], #56]\n\t"
        "str	r3, [%[r], #60]\n\t"
        "pop	{r3, r4, r5, r6}\n\t"
        "stm	%[r]!, {r3, r4, r5, r6}\n\t"
        "pop	{r3, r4, r5, r6}\n\t"
        "stm	%[r]!, {r3, r4, r5, r6}\n\t"
        : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b)
        :
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc"
    );
}

#endif /* !WOLFSSL_SP_LARGE_CODE */
/* Add b to a into r. (r = a + b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
SP_NOINLINE static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    __asm__ __volatile__ (
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "movs	%[r], #0\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	%[r], %[r], %[r]\n\t"
#elif defined(__clang__)
        "adcs	%[r], %[r]\n\t"
#else
        "adc	%[r], %[r]\n\t"
#endif
        : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b)
        :
        : "memory", "r3", "r4", "r5", "r6", "cc"
    );
    return (word32)(size_t)r;
}

/* Add b to a into r. (r = a + b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
SP_NOINLINE static sp_digit sp_2048_add_word_8(sp_digit* r, const sp_digit* a,
        sp_digit b)
{
    __asm__ __volatile__ (
        "movs	r5, #0\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, %[b]\n\t"
#else
        "add	r3, r3, %[b]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r5\n\t"
#elif defined(__clang__)
        "adcs	r4, r5\n\t"
#else
        "adc	r4, r5\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r5\n\t"
#elif defined(__clang__)
        "adcs	r4, r5\n\t"
#else
        "adc	r4, r5\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r5\n\t"
#elif defined(__clang__)
        "adcs	r4, r5\n\t"
#else
        "adc	r4, r5\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r5\n\t"
#elif defined(__clang__)
        "adcs	r4, r5\n\t"
#else
        "adc	r4, r5\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "movs	%[r], #0\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	%[r], %[r], %[r]\n\t"
#elif defined(__clang__)
        "adcs	%[r], %[r]\n\t"
#else
        "adc	%[r], %[r]\n\t"
#endif
        : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b)
        :
        : "memory", "r3", "r4", "r5", "cc"
    );
    return (word32)(size_t)r;
}

/* Sub b from a into a. (a -= b)
 *
 * a  A single precision integer.
 * b  A single precision integer.
 */
SP_NOINLINE static sp_digit sp_2048_sub_in_place_16(sp_digit* a,
        const sp_digit* b)
{
    __asm__ __volatile__ (
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r2, r2, r4\n\t"
#else
        "sub	r2, r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	%[a], %[a], %[a]\n\t"
#elif defined(__clang__)
        "sbcs	%[a], %[a]\n\t"
#else
        "sbc	%[a], %[a]\n\t"
#endif
        : [a] "+l" (a), [b] "+l" (b)
        :
        : "memory", "r2", "r3", "r4", "r5", "cc"
    );
    return (word32)(size_t)a;
}

/* Add b to a into r. (r = a + b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
SP_NOINLINE static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    __asm__ __volatile__ (
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "movs	%[r], #0\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	%[r], %[r], %[r]\n\t"
#elif defined(__clang__)
        "adcs	%[r], %[r]\n\t"
#else
        "adc	%[r], %[r]\n\t"
#endif
        : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b)
        :
        : "memory", "r3", "r4", "r5", "r6", "cc"
    );
    return (word32)(size_t)r;
}

/* AND m into each word of a and store in r.
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * m  Mask to AND against each digit.
 */
static void sp_2048_mask_8(sp_digit* r, const sp_digit* a, sp_digit m)
{
#ifdef WOLFSSL_SP_SMALL
    int i;

    for (i=0; i<8; i++) {
        r[i] = a[i] & m;
    }
#else
    r[0] = a[0] & m;
    r[1] = a[1] & m;
    r[2] = a[2] & m;
    r[3] = a[3] & m;
    r[4] = a[4] & m;
    r[5] = a[5] & m;
    r[6] = a[6] & m;
    r[7] = a[7] & m;
#endif
}

/* Multiply a and b into r. (r = a * b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
SP_NOINLINE static void sp_2048_mul_16(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    sp_digit* z0 = r;
    sp_digit z1[16];
    sp_digit a1[8];
    sp_digit b1[8];
    sp_digit* z2 = r + 16;
    sp_digit u;
    sp_digit ca;
    sp_digit cb;

    ca = sp_2048_add_8(a1, a, &a[8]);
    cb = sp_2048_add_8(b1, b, &b[8]);
    u  = ca & cb;

    sp_2048_mul_8(z2, &a[8], &b[8]);
    sp_2048_mul_8(z0, a, b);
    sp_2048_mul_8(z1, a1, b1);

    u += sp_2048_sub_in_place_16(z1, z0);
    u += sp_2048_sub_in_place_16(z1, z2);
    sp_2048_mask_8(a1, a1, 0 - cb);
    u += sp_2048_add_8(z1 + 8, z1 + 8, a1);
    sp_2048_mask_8(b1, b1, 0 - ca);
    u += sp_2048_add_8(z1 + 8, z1 + 8, b1);

    u += sp_2048_add_16(r + 8, r + 8, z1);
    (void)sp_2048_add_word_8(r + 24, r + 24, u);
}

/* Add b to a into r. (r = a + b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
SP_NOINLINE static sp_digit sp_2048_add_word_16(sp_digit* r, const sp_digit* a,
        sp_digit b)
{
    __asm__ __volatile__ (
        "movs	r5, #0\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, %[b]\n\t"
#else
        "add	r3, r3, %[b]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r5\n\t"
#elif defined(__clang__)
        "adcs	r4, r5\n\t"
#else
        "adc	r4, r5\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r5\n\t"
#elif defined(__clang__)
        "adcs	r4, r5\n\t"
#else
        "adc	r4, r5\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r5\n\t"
#elif defined(__clang__)
        "adcs	r4, r5\n\t"
#else
        "adc	r4, r5\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r5\n\t"
#elif defined(__clang__)
        "adcs	r4, r5\n\t"
#else
        "adc	r4, r5\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r5\n\t"
#elif defined(__clang__)
        "adcs	r4, r5\n\t"
#else
        "adc	r4, r5\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r5\n\t"
#elif defined(__clang__)
        "adcs	r4, r5\n\t"
#else
        "adc	r4, r5\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r5\n\t"
#elif defined(__clang__)
        "adcs	r4, r5\n\t"
#else
        "adc	r4, r5\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r5\n\t"
#elif defined(__clang__)
        "adcs	r4, r5\n\t"
#else
        "adc	r4, r5\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "movs	%[r], #0\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	%[r], %[r], %[r]\n\t"
#elif defined(__clang__)
        "adcs	%[r], %[r]\n\t"
#else
        "adc	%[r], %[r]\n\t"
#endif
        : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b)
        :
        : "memory", "r3", "r4", "r5", "cc"
    );
    return (word32)(size_t)r;
}

/* Sub b from a into a. (a -= b)
 *
 * a  A single precision integer.
 * b  A single precision integer.
 */
SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a,
        const sp_digit* b)
{
    __asm__ __volatile__ (
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r2, r2, r4\n\t"
#else
        "sub	r2, r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	%[a], %[a], %[a]\n\t"
#elif defined(__clang__)
        "sbcs	%[a], %[a]\n\t"
#else
        "sbc	%[a], %[a]\n\t"
#endif
        : [a] "+l" (a), [b] "+l" (b)
        :
        : "memory", "r2", "r3", "r4", "r5", "cc"
    );
    return (word32)(size_t)a;
}

/* Add b to a into r. (r = a + b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    __asm__ __volatile__ (
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "movs	%[r], #0\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	%[r], %[r], %[r]\n\t"
#elif defined(__clang__)
        "adcs	%[r], %[r]\n\t"
#else
        "adc	%[r], %[r]\n\t"
#endif
        : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b)
        :
        : "memory", "r3", "r4", "r5", "r6", "cc"
    );
    return (word32)(size_t)r;
}

/* AND m into each word of a and store in r.
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * m  Mask to AND against each digit.
 */
static void sp_2048_mask_16(sp_digit* r, const sp_digit* a, sp_digit m)
{
#ifdef WOLFSSL_SP_SMALL
    int i;

    for (i=0; i<16; i++) {
        r[i] = a[i] & m;
    }
#else
    int i;

    for (i = 0; i < 16; i += 8) {
        r[i+0] = a[i+0] & m;
        r[i+1] = a[i+1] & m;
        r[i+2] = a[i+2] & m;
        r[i+3] = a[i+3] & m;
        r[i+4] = a[i+4] & m;
        r[i+5] = a[i+5] & m;
        r[i+6] = a[i+6] & m;
        r[i+7] = a[i+7] & m;
    }
#endif
}

/* Multiply a and b into r. (r = a * b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    sp_digit* z0 = r;
    sp_digit z1[32];
    sp_digit a1[16];
    sp_digit b1[16];
    sp_digit* z2 = r + 32;
    sp_digit u;
    sp_digit ca;
    sp_digit cb;

    ca = sp_2048_add_16(a1, a, &a[16]);
    cb = sp_2048_add_16(b1, b, &b[16]);
    u  = ca & cb;

    sp_2048_mul_16(z2, &a[16], &b[16]);
    sp_2048_mul_16(z0, a, b);
    sp_2048_mul_16(z1, a1, b1);

    u += sp_2048_sub_in_place_32(z1, z0);
    u += sp_2048_sub_in_place_32(z1, z2);
    sp_2048_mask_16(a1, a1, 0 - cb);
    u += sp_2048_add_16(z1 + 16, z1 + 16, a1);
    sp_2048_mask_16(b1, b1, 0 - ca);
    u += sp_2048_add_16(z1 + 16, z1 + 16, b1);

    u += sp_2048_add_32(r + 16, r + 16, z1);
    (void)sp_2048_add_word_16(r + 48, r + 48, u);
}

/* Add b to a into r. (r = a + b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
SP_NOINLINE static sp_digit sp_2048_add_word_32(sp_digit* r, const sp_digit* a,
        sp_digit b)
{
    __asm__ __volatile__ (
        "movs	r5, #0\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, %[b]\n\t"
#else
        "add	r3, r3, %[b]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r5\n\t"
#elif defined(__clang__)
        "adcs	r4, r5\n\t"
#else
        "adc	r4, r5\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r5\n\t"
#elif defined(__clang__)
        "adcs	r4, r5\n\t"
#else
        "adc	r4, r5\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r5\n\t"
#elif defined(__clang__)
        "adcs	r4, r5\n\t"
#else
        "adc	r4, r5\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r5\n\t"
#elif defined(__clang__)
        "adcs	r4, r5\n\t"
#else
        "adc	r4, r5\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r5\n\t"
#elif defined(__clang__)
        "adcs	r4, r5\n\t"
#else
        "adc	r4, r5\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r5\n\t"
#elif defined(__clang__)
        "adcs	r4, r5\n\t"
#else
        "adc	r4, r5\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r5\n\t"
#elif defined(__clang__)
        "adcs	r4, r5\n\t"
#else
        "adc	r4, r5\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r5\n\t"
#elif defined(__clang__)
        "adcs	r4, r5\n\t"
#else
        "adc	r4, r5\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r5\n\t"
#elif defined(__clang__)
        "adcs	r4, r5\n\t"
#else
        "adc	r4, r5\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r5\n\t"
#elif defined(__clang__)
        "adcs	r4, r5\n\t"
#else
        "adc	r4, r5\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r5\n\t"
#elif defined(__clang__)
        "adcs	r4, r5\n\t"
#else
        "adc	r4, r5\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r5\n\t"
#elif defined(__clang__)
        "adcs	r4, r5\n\t"
#else
        "adc	r4, r5\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r5\n\t"
#elif defined(__clang__)
        "adcs	r4, r5\n\t"
#else
        "adc	r4, r5\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r5\n\t"
#elif defined(__clang__)
        "adcs	r4, r5\n\t"
#else
        "adc	r4, r5\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r5\n\t"
#elif defined(__clang__)
        "adcs	r4, r5\n\t"
#else
        "adc	r4, r5\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r5\n\t"
#elif defined(__clang__)
        "adcs	r4, r5\n\t"
#else
        "adc	r4, r5\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "movs	%[r], #0\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	%[r], %[r], %[r]\n\t"
#elif defined(__clang__)
        "adcs	%[r], %[r]\n\t"
#else
        "adc	%[r], %[r]\n\t"
#endif
        : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b)
        :
        : "memory", "r3", "r4", "r5", "cc"
    );
    return (word32)(size_t)r;
}

/* Sub b from a into a. (a -= b)
 *
 * a  A single precision integer.
 * b  A single precision integer.
 */
SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a,
        const sp_digit* b)
{
    __asm__ __volatile__ (
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r2, r2, r4\n\t"
#else
        "sub	r2, r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
        "ldm	%[b]!, {r4, r5}\n\t"
        "ldr	r2, [%[a]]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r4\n\t"
#elif defined(__clang__)
        "sbcs	r2, r4\n\t"
#else
        "sbc	r2, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
        "stm	%[a]!, {r2, r3}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	%[a], %[a], %[a]\n\t"
#elif defined(__clang__)
        "sbcs	%[a], %[a]\n\t"
#else
        "sbc	%[a], %[a]\n\t"
#endif
        : [a] "+l" (a), [b] "+l" (b)
        :
        : "memory", "r2", "r3", "r4", "r5", "cc"
    );
    return (word32)(size_t)a;
}

/* Add b to a into r. (r = a + b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    __asm__ __volatile__ (
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "adcs	r3, r5\n\t"
#else
        "adc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "movs	%[r], #0\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	%[r], %[r], %[r]\n\t"
#elif defined(__clang__)
        "adcs	%[r], %[r]\n\t"
#else
        "adc	%[r], %[r]\n\t"
#endif
        : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b)
        :
        : "memory", "r3", "r4", "r5", "r6", "cc"
    );
    return (word32)(size_t)r;
}

/* AND m into each word of a and store in r.
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * m  Mask to AND against each digit.
 */
static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m)
{
#ifdef WOLFSSL_SP_SMALL
    int i;

    for (i=0; i<32; i++) {
        r[i] = a[i] & m;
    }
#else
    int i;

    for (i = 0; i < 32; i += 8) {
        r[i+0] = a[i+0] & m;
        r[i+1] = a[i+1] & m;
        r[i+2] = a[i+2] & m;
        r[i+3] = a[i+3] & m;
        r[i+4] = a[i+4] & m;
        r[i+5] = a[i+5] & m;
        r[i+6] = a[i+6] & m;
        r[i+7] = a[i+7] & m;
    }
#endif
}

/* Multiply a and b into r. (r = a * b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    sp_digit* z0 = r;
    sp_digit z1[64];
    sp_digit a1[32];
    sp_digit b1[32];
    sp_digit* z2 = r + 64;
    sp_digit u;
    sp_digit ca;
    sp_digit cb;

    ca = sp_2048_add_32(a1, a, &a[32]);
    cb = sp_2048_add_32(b1, b, &b[32]);
    u  = ca & cb;

    sp_2048_mul_32(z2, &a[32], &b[32]);
    sp_2048_mul_32(z0, a, b);
    sp_2048_mul_32(z1, a1, b1);

    u += sp_2048_sub_in_place_64(z1, z0);
    u += sp_2048_sub_in_place_64(z1, z2);
    sp_2048_mask_32(a1, a1, 0 - cb);
    u += sp_2048_add_32(z1 + 32, z1 + 32, a1);
    sp_2048_mask_32(b1, b1, 0 - ca);
    u += sp_2048_add_32(z1 + 32, z1 + 32, b1);

    u += sp_2048_add_64(r + 32, r + 32, z1);
    (void)sp_2048_add_word_32(r + 96, r + 96, u);
}

#ifndef WOLFSSL_SP_LARGE_CODE
/* Square a and put result in r. (r = a * a)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 */
SP_NOINLINE static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a)
{
    __asm__ __volatile__ (
        "movs	r3, #0\n\t"
        "movs	r4, #0\n\t"
        "movs	r5, #0\n\t"
        "mov	r8, r3\n\t"
        "mov	r11, %[r]\n\t"
        "movs	r6, #0x40\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "negs	r6, r6\n\t"
#else
        "neg	r6, r6\n\t"
#endif
        "add	sp, sp, r6\n\t"
        "mov	r10, sp\n\t"
        "mov	r9, %[a]\n\t"
        "\n"
    "L_sp_2048_sqr_8_words_%=:\n\t"
        "movs	%[r], #0\n\t"
        "movs	r6, #28\n\t"
        "mov	%[a], r8\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	%[a], %[a], r6\n\t"
#else
        "sub	%[a], %[a], r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r6, r6, r6\n\t"
#elif defined(__clang__)
        "sbcs	r6, r6\n\t"
#else
        "sbc	r6, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "mvns	r6, r6\n\t"
#else
        "mvn	r6, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "ands	%[a], %[a], r6\n\t"
#elif defined(__clang__)
        "ands	%[a], r6\n\t"
#else
        "and	%[a], r6\n\t"
#endif
        "mov	r2, r8\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r2, r2, %[a]\n\t"
#else
        "sub	r2, r2, %[a]\n\t"
#endif
        "add	%[a], %[a], r9\n\t"
        "add	r2, r2, r9\n\t"
        "\n"
    "L_sp_2048_sqr_8_mul_%=:\n\t"
        "cmp	r2, %[a]\n\t"
        "beq	L_sp_2048_sqr_8_sqr_%=\n\t"
        "# Multiply * 2: Start\n\t"
        "ldrh	r6, [%[a]]\n\t"
        "ldrh	r7, [r2]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "ldr	r7, [r2]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r7, #16\n\t"
#else
        "lsr	r7, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "ldr	r6, [%[a]]\n\t"
        "ldr	r7, [r2]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, #16\n\t"
#else
        "lsr	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r7, #16\n\t"
#else
        "lsr	r7, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "ldrh	r7, [r2]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "# Multiply * 2: Done\n\t"
        "bal	L_sp_2048_sqr_8_done_sqr_%=\n\t"
        "\n"
    "L_sp_2048_sqr_8_sqr_%=:\n\t"
        "mov	r12, r2\n\t"
        "ldr	r2, [%[a]]\n\t"
        "# Square: Start\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r2, #16\n\t"
#else
        "lsr	r7, r2, #16\n\t"
#endif
        "uxth	r6, r2\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r6, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r6\n\t"
#else
        "mul	r6, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r7, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r7\n\t"
#else
        "mul	r7, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r2, #16\n\t"
#else
        "lsr	r7, r2, #16\n\t"
#endif
        "uxth	r6, r2\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #15\n\t"
#else
        "lsr	r7, r6, #15\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #17\n\t"
#else
        "lsl	r6, r6, #17\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "# Square: Done\n\t"
        "mov	r2, r12\n\t"
        "\n"
    "L_sp_2048_sqr_8_done_sqr_%=:\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	%[a], %[a], #4\n\t"
#else
        "add	%[a], %[a], #4\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r2, r2, #4\n\t"
#else
        "sub	r2, r2, #4\n\t"
#endif
        "movs	r6, #32\n\t"
        "add	r6, r6, r9\n\t"
        "cmp	%[a], r6\n\t"
        "beq	L_sp_2048_sqr_8_done_mul_%=\n\t"
        "cmp	%[a], r2\n\t"
        "bgt	L_sp_2048_sqr_8_done_mul_%=\n\t"
        "mov	r7, r8\n\t"
        "add	r7, r7, r9\n\t"
        "cmp	%[a], r7\n\t"
        "ble	L_sp_2048_sqr_8_mul_%=\n\t"
        "\n"
    "L_sp_2048_sqr_8_done_mul_%=:\n\t"
        "mov	%[r], r10\n\t"
        "mov	r7, r8\n\t"
        "str	r3, [%[r], r7]\n\t"
        "movs	r3, r4\n\t"
        "movs	r4, r5\n\t"
        "movs	r5, #0\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r7, r7, #4\n\t"
#else
        "add	r7, r7, #4\n\t"
#endif
        "mov	r8, r7\n\t"
        "movs	r6, #56\n\t"
        "cmp	r7, r6\n\t"
        "ble	L_sp_2048_sqr_8_words_%=\n\t"
        "mov	%[a], r9\n\t"
        "str	r3, [%[r], r7]\n\t"
        "mov	%[r], r11\n\t"
        "mov	%[a], r10\n\t"
        "movs	r3, #60\n\t"
        "\n"
    "L_sp_2048_sqr_8_store_%=:\n\t"
        "ldr	r6, [%[a], r3]\n\t"
        "str	r6, [%[r], r3]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r3, r3, #4\n\t"
#else
        "sub	r3, r3, #4\n\t"
#endif
        "bge	L_sp_2048_sqr_8_store_%=\n\t"
        "movs	r6, #0x40\n\t"
        "add	sp, sp, r6\n\t"
        : [r] "+l" (r), [a] "+l" (a)
        :
        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc"
    );
}

#else
/* Square a and put result in r. (r = a * a)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 */
SP_NOINLINE static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a)
{
    __asm__ __volatile__ (
        "sub	sp, sp, #32\n\t"
        "mov	r8, %[r]\n\t"
        "mov	r9, %[a]\n\t"
        "movs	%[r], #0\n\t"
        "ldm	%[a]!, {r2, r3, r4, r5}\n\t"
        "mov	r10, r2\n\t"
        "mov	r11, r3\n\t"
        "mov	r12, r4\n\t"
        "mov	lr, r5\n\t"
        "mov	%[a], r9\n\t"
        "#  A[0] * A[0]\n\t"
        "movs	r4, #0\n\t"
        "mov	r7, r10\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
        "uxth	r5, r7\n\t"
        "movs	r2, r5\n\t"
        "movs	r3, r6\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r2, r2, r2\n\t"
#elif defined(__clang__)
        "muls	r2, r2\n\t"
#else
        "mul	r2, r2\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r3, r3, r3\n\t"
#elif defined(__clang__)
        "muls	r3, r3\n\t"
#else
        "mul	r3, r3\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #15\n\t"
#else
        "lsr	r6, r5, #15\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #17\n\t"
#else
        "lsl	r5, r5, #17\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
        "str	r2, [sp]\n\t"
        "#  A[1] * A[0]\n\t"
        "movs	r2, #0\n\t"
        "mov	%[a], r11\n\t"
        "uxth	r5, %[a]\n\t"
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[a], #16\n\t"
#else
        "lsr	r5, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
        "str	r3, [sp, #4]\n\t"
        "#  A[2] * A[0]\n\t"
        "movs	r3, #0\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[a], r12\n\t"
        "uxth	r5, %[a]\n\t"
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[a], #16\n\t"
#else
        "lsr	r5, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r6\n\t"
#else
        "add	r2, r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r6\n\t"
#else
        "add	r2, r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[1] * A[1]\n\t"
        "mov	r7, r11\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
        "uxth	r5, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r5, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r5\n\t"
#else
        "mul	r5, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r6, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r6\n\t"
#else
        "mul	r6, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
        "uxth	r5, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #15\n\t"
#else
        "lsr	r6, r5, #15\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #17\n\t"
#else
        "lsl	r5, r5, #17\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "str	r4, [sp, #8]\n\t"
        "#  A[2] * A[1]\n\t"
        "movs	r4, #0\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[a], r12\n\t"
        "uxth	r5, %[a]\n\t"
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r6\n\t"
#else
        "add	r2, r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r6\n\t"
#else
        "add	r2, r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[a], #16\n\t"
#else
        "lsr	r5, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[3] * A[0]\n\t"
        "mov	%[a], r9\n\t"
        "mov	r7, r10\n\t"
        "mov	%[a], lr\n\t"
        "uxth	r5, %[a]\n\t"
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r6\n\t"
#else
        "add	r2, r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r6\n\t"
#else
        "add	r2, r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[a], #16\n\t"
#else
        "lsr	r5, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "str	r2, [sp, #12]\n\t"
        "#  A[4] * A[0]\n\t"
        "movs	r2, #0\n\t"
        "mov	%[a], r9\n\t"
        "ldr	%[a], [%[a], #16]\n\t"
        "uxth	r5, %[a]\n\t"
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[a], #16\n\t"
#else
        "lsr	r5, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
        "#  A[3] * A[1]\n\t"
        "mov	%[a], r9\n\t"
        "mov	r7, r11\n\t"
        "mov	%[a], lr\n\t"
        "uxth	r5, %[a]\n\t"
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[a], #16\n\t"
#else
        "lsr	r5, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
        "#  A[2] * A[2]\n\t"
        "mov	r7, r12\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
        "uxth	r5, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r5, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r5\n\t"
#else
        "mul	r5, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r6, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r6\n\t"
#else
        "mul	r6, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
        "uxth	r5, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #15\n\t"
#else
        "lsr	r6, r5, #15\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #17\n\t"
#else
        "lsl	r5, r5, #17\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
        "str	r3, [sp, #16]\n\t"
        "#  A[3] * A[2]\n\t"
        "movs	r3, #0\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[a], lr\n\t"
        "uxth	r5, %[a]\n\t"
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[a], #16\n\t"
#else
        "lsr	r5, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r6\n\t"
#else
        "add	r2, r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r6\n\t"
#else
        "add	r2, r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[4] * A[1]\n\t"
        "mov	%[a], r9\n\t"
        "mov	r7, r11\n\t"
        "ldr	%[a], [%[a], #16]\n\t"
        "uxth	r5, %[a]\n\t"
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[a], #16\n\t"
#else
        "lsr	r5, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r6\n\t"
#else
        "add	r2, r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r6\n\t"
#else
        "add	r2, r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[5] * A[0]\n\t"
        "mov	%[a], r9\n\t"
        "mov	r7, r10\n\t"
        "ldr	%[a], [%[a], #20]\n\t"
        "uxth	r5, %[a]\n\t"
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[a], #16\n\t"
#else
        "lsr	r5, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r6\n\t"
#else
        "add	r2, r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r6\n\t"
#else
        "add	r2, r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "str	r4, [sp, #20]\n\t"
        "#  A[6] * A[0]\n\t"
        "movs	r4, #0\n\t"
        "mov	%[a], r9\n\t"
        "ldr	%[a], [%[a], #24]\n\t"
        "uxth	r5, %[a]\n\t"
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r6\n\t"
#else
        "add	r2, r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r6\n\t"
#else
        "add	r2, r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[a], #16\n\t"
#else
        "lsr	r5, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[5] * A[1]\n\t"
        "mov	%[a], r9\n\t"
        "mov	r7, r11\n\t"
        "ldr	%[a], [%[a], #20]\n\t"
        "uxth	r5, %[a]\n\t"
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r6\n\t"
#else
        "add	r2, r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r6\n\t"
#else
        "add	r2, r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[a], #16\n\t"
#else
        "lsr	r5, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[4] * A[2]\n\t"
        "mov	%[a], r9\n\t"
        "mov	r7, r12\n\t"
        "ldr	%[a], [%[a], #16]\n\t"
        "uxth	r5, %[a]\n\t"
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r6\n\t"
#else
        "add	r2, r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r6\n\t"
#else
        "add	r2, r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[a], #16\n\t"
#else
        "lsr	r5, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[3] * A[3]\n\t"
        "mov	r7, lr\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
        "uxth	r5, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r5, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r5\n\t"
#else
        "mul	r5, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r6, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r6\n\t"
#else
        "mul	r6, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
        "uxth	r5, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #15\n\t"
#else
        "lsr	r6, r5, #15\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #17\n\t"
#else
        "lsl	r5, r5, #17\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "str	r2, [sp, #24]\n\t"
        "#  A[4] * A[3]\n\t"
        "movs	r2, #0\n\t"
        "mov	%[a], r9\n\t"
        "ldr	%[a], [%[a], #16]\n\t"
        "uxth	r5, %[a]\n\t"
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[a], #16\n\t"
#else
        "lsr	r5, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
        "#  A[5] * A[2]\n\t"
        "mov	%[a], r9\n\t"
        "mov	r7, r12\n\t"
        "ldr	%[a], [%[a], #20]\n\t"
        "uxth	r5, %[a]\n\t"
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[a], #16\n\t"
#else
        "lsr	r5, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
        "#  A[6] * A[1]\n\t"
        "mov	%[a], r9\n\t"
        "mov	r7, r11\n\t"
        "ldr	%[a], [%[a], #24]\n\t"
        "uxth	r5, %[a]\n\t"
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[a], #16\n\t"
#else
        "lsr	r5, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
        "#  A[7] * A[0]\n\t"
        "mov	%[a], r9\n\t"
        "mov	r7, r10\n\t"
        "ldr	%[a], [%[a], #28]\n\t"
        "uxth	r5, %[a]\n\t"
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[a], #16\n\t"
#else
        "lsr	r5, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
        "str	r3, [sp, #28]\n\t"
        "#  A[7] * A[1]\n\t"
        "movs	r3, #0\n\t"
        "mov	%[a], r9\n\t"
        "mov	r7, r11\n\t"
        "ldr	%[a], [%[a], #28]\n\t"
        "uxth	r5, %[a]\n\t"
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[a], #16\n\t"
#else
        "lsr	r5, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r6\n\t"
#else
        "add	r2, r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r6\n\t"
#else
        "add	r2, r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[6] * A[2]\n\t"
        "mov	%[a], r9\n\t"
        "mov	r7, r12\n\t"
        "ldr	%[a], [%[a], #24]\n\t"
        "uxth	r5, %[a]\n\t"
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[a], #16\n\t"
#else
        "lsr	r5, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r6\n\t"
#else
        "add	r2, r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r6\n\t"
#else
        "add	r2, r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[5] * A[3]\n\t"
        "mov	%[a], r9\n\t"
        "mov	r7, lr\n\t"
        "ldr	%[a], [%[a], #20]\n\t"
        "uxth	r5, %[a]\n\t"
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[a], #16\n\t"
#else
        "lsr	r5, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r6\n\t"
#else
        "add	r2, r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r6\n\t"
#else
        "add	r2, r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[4] * A[4]\n\t"
        "mov	%[a], r9\n\t"
        "ldr	r7, [%[a], #16]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
        "uxth	r5, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r5, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r5\n\t"
#else
        "mul	r5, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r6, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r6\n\t"
#else
        "mul	r6, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
        "uxth	r5, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #15\n\t"
#else
        "lsr	r6, r5, #15\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #17\n\t"
#else
        "lsl	r5, r5, #17\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "mov	%[r], r8\n\t"
        "str	r4, [%[r], #32]\n\t"
        "movs	%[r], #0\n\t"
        "movs	%[a], #16\n\t"
        "add	%[a], %[a], r9\n\t"
        "ldm	%[a]!, {r5, r6}\n\t"
        "mov	r10, r5\n\t"
        "mov	r11, r6\n\t"
        "ldm	%[a]!, {r5, r6}\n\t"
        "mov	r12, r5\n\t"
        "mov	lr, r6\n\t"
        "mov	%[a], r9\n\t"
        "#  A[5] * A[4]\n\t"
        "movs	r4, #0\n\t"
        "mov	%[a], r11\n\t"
        "uxth	r5, %[a]\n\t"
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r6\n\t"
#else
        "add	r2, r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r6\n\t"
#else
        "add	r2, r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[a], #16\n\t"
#else
        "lsr	r5, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[6] * A[3]\n\t"
        "mov	%[a], r9\n\t"
        "ldr	r7, [%[a], #12]\n\t"
        "mov	%[a], r12\n\t"
        "uxth	r5, %[a]\n\t"
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r6\n\t"
#else
        "add	r2, r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r6\n\t"
#else
        "add	r2, r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[a], #16\n\t"
#else
        "lsr	r5, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[7] * A[2]\n\t"
        "mov	%[a], r9\n\t"
        "ldr	r7, [%[a], #8]\n\t"
        "mov	%[a], lr\n\t"
        "uxth	r5, %[a]\n\t"
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r6\n\t"
#else
        "add	r2, r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r6\n\t"
#else
        "add	r2, r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[a], #16\n\t"
#else
        "lsr	r5, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "mov	%[r], r8\n\t"
        "str	r2, [%[r], #36]\n\t"
        "movs	%[r], #0\n\t"
        "#  A[7] * A[3]\n\t"
        "movs	r2, #0\n\t"
        "mov	%[a], r9\n\t"
        "ldr	r7, [%[a], #12]\n\t"
        "mov	%[a], lr\n\t"
        "uxth	r5, %[a]\n\t"
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[a], #16\n\t"
#else
        "lsr	r5, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
        "#  A[6] * A[4]\n\t"
        "mov	%[a], r9\n\t"
        "mov	r7, r10\n\t"
        "mov	%[a], r12\n\t"
        "uxth	r5, %[a]\n\t"
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[a], #16\n\t"
#else
        "lsr	r5, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
        "#  A[5] * A[5]\n\t"
        "mov	r7, r11\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
        "uxth	r5, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r5, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r5\n\t"
#else
        "mul	r5, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r6, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r6\n\t"
#else
        "mul	r6, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
        "uxth	r5, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #15\n\t"
#else
        "lsr	r6, r5, #15\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #17\n\t"
#else
        "lsl	r5, r5, #17\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
        "mov	%[r], r8\n\t"
        "str	r3, [%[r], #40]\n\t"
        "movs	%[r], #0\n\t"
        "#  A[6] * A[5]\n\t"
        "movs	r3, #0\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[a], r12\n\t"
        "uxth	r5, %[a]\n\t"
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[a], #16\n\t"
#else
        "lsr	r5, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r6\n\t"
#else
        "add	r2, r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r6\n\t"
#else
        "add	r2, r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[7] * A[4]\n\t"
        "mov	%[a], r9\n\t"
        "mov	r7, r10\n\t"
        "mov	%[a], lr\n\t"
        "uxth	r5, %[a]\n\t"
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[a], #16\n\t"
#else
        "lsr	r5, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r6\n\t"
#else
        "add	r2, r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r6\n\t"
#else
        "add	r2, r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "mov	%[r], r8\n\t"
        "str	r4, [%[r], #44]\n\t"
        "movs	%[r], #0\n\t"
        "#  A[7] * A[5]\n\t"
        "movs	r4, #0\n\t"
        "mov	%[a], r9\n\t"
        "mov	r7, r11\n\t"
        "mov	%[a], lr\n\t"
        "uxth	r5, %[a]\n\t"
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r6\n\t"
#else
        "add	r2, r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r6\n\t"
#else
        "add	r2, r2, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[a], #16\n\t"
#else
        "lsr	r5, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[6] * A[6]\n\t"
        "mov	r7, r12\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
        "uxth	r5, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r5, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r5\n\t"
#else
        "mul	r5, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r6, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r6\n\t"
#else
        "mul	r6, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
        "uxth	r5, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #15\n\t"
#else
        "lsr	r6, r5, #15\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #17\n\t"
#else
        "lsl	r5, r5, #17\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r5\n\t"
#else
        "add	r2, r2, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "mov	%[r], r8\n\t"
        "str	r2, [%[r], #48]\n\t"
        "movs	%[r], #0\n\t"
        "#  A[7] * A[6]\n\t"
        "movs	r2, #0\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[a], lr\n\t"
        "uxth	r5, %[a]\n\t"
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[a], #16\n\t"
#else
        "lsr	r5, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
        "uxth	r6, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r2, %[r]\n\t"
#else
        "adc	r2, %[r]\n\t"
#endif
        "mov	%[r], r8\n\t"
        "str	r3, [%[r], #52]\n\t"
        "movs	%[r], #0\n\t"
        "#  A[7] * A[7]\n\t"
        "mov	%[a], r9\n\t"
        "mov	r7, lr\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
        "uxth	r5, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r5, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r5\n\t"
#else
        "mul	r5, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r6, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r6\n\t"
#else
        "mul	r6, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r7, #16\n\t"
#else
        "lsr	r6, r7, #16\n\t"
#endif
        "uxth	r5, r7\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #15\n\t"
#else
        "lsr	r6, r5, #15\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #17\n\t"
#else
        "lsl	r5, r5, #17\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r2, r2, r6\n\t"
#elif defined(__clang__)
        "adcs	r2, r6\n\t"
#else
        "adc	r2, r6\n\t"
#endif
        "mov	%[r], r8\n\t"
        "str	r4, [%[r], #56]\n\t"
        "str	r2, [%[r], #60]\n\t"
        "pop	{r2, r3, r4, r5}\n\t"
        "stm	%[r]!, {r2, r3, r4, r5}\n\t"
        "pop	{r2, r3, r4, r5}\n\t"
        "stm	%[r]!, {r2, r3, r4, r5}\n\t"
        : [r] "+l" (r), [a] "+l" (a)
        :
        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc"
    );
}

#endif /* !WOLFSSL_SP_LARGE_CODE */
/* Sub b from a into r. (r = a - b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
SP_NOINLINE static sp_digit sp_2048_sub_8(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    __asm__ __volatile__ (
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r3, r3, r5\n\t"
#else
        "sub	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	%[r], %[r], %[r]\n\t"
#elif defined(__clang__)
        "sbcs	%[r], %[r]\n\t"
#else
        "sbc	%[r], %[r]\n\t"
#endif
        : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b)
        :
        : "memory", "r3", "r4", "r5", "r6", "cc"
    );
    return (word32)(size_t)r;
}

/* Square a and put result in r. (r = a * a)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 */
SP_NOINLINE static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a)
{
    sp_digit* z0 = r;
    sp_digit* z2 = r + 16;
    sp_digit z1[16];
    sp_digit* a1 = z1;
    sp_digit* zero = z1 + 8;
    sp_digit u;
    sp_digit mask;
    sp_digit* p1;
    sp_digit* p2;

    XMEMSET(zero, 0, sizeof(sp_digit) * 8);

    mask = sp_2048_sub_8(a1, a, &a[8]);
    p1 = (sp_digit*)(((sp_digit)zero &   mask ) | ((sp_digit)a1 & (~mask)));
    p2 = (sp_digit*)(((sp_digit)zero & (~mask)) | ((sp_digit)a1 &   mask ));
    (void)sp_2048_sub_8(a1, p1, p2);

    sp_2048_sqr_8(z2, &a[8]);
    sp_2048_sqr_8(z0, a);
    sp_2048_sqr_8(z1, a1);

    u = 0;
    u -= sp_2048_sub_in_place_16(z1, z2);
    u -= sp_2048_sub_in_place_16(z1, z0);
    u += sp_2048_sub_in_place_16(r + 8, z1);
    sp_2048_add_word_8(r + 24, r + 24, u);
}

/* Sub b from a into r. (r = a - b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
SP_NOINLINE static sp_digit sp_2048_sub_16(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    __asm__ __volatile__ (
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r3, r3, r5\n\t"
#else
        "sub	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	%[r], %[r], %[r]\n\t"
#elif defined(__clang__)
        "sbcs	%[r], %[r]\n\t"
#else
        "sbc	%[r], %[r]\n\t"
#endif
        : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b)
        :
        : "memory", "r3", "r4", "r5", "r6", "cc"
    );
    return (word32)(size_t)r;
}

/* Square a and put result in r. (r = a * a)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 */
SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a)
{
    sp_digit* z0 = r;
    sp_digit* z2 = r + 32;
    sp_digit z1[32];
    sp_digit* a1 = z1;
    sp_digit* zero = z1 + 16;
    sp_digit u;
    sp_digit mask;
    sp_digit* p1;
    sp_digit* p2;

    XMEMSET(zero, 0, sizeof(sp_digit) * 16);

    mask = sp_2048_sub_16(a1, a, &a[16]);
    p1 = (sp_digit*)(((sp_digit)zero &   mask ) | ((sp_digit)a1 & (~mask)));
    p2 = (sp_digit*)(((sp_digit)zero & (~mask)) | ((sp_digit)a1 &   mask ));
    (void)sp_2048_sub_16(a1, p1, p2);

    sp_2048_sqr_16(z2, &a[16]);
    sp_2048_sqr_16(z0, a);
    sp_2048_sqr_16(z1, a1);

    u = 0;
    u -= sp_2048_sub_in_place_32(z1, z2);
    u -= sp_2048_sub_in_place_32(z1, z0);
    u += sp_2048_sub_in_place_32(r + 16, z1);
    sp_2048_add_word_16(r + 48, r + 48, u);
}

/* Sub b from a into r. (r = a - b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
SP_NOINLINE static sp_digit sp_2048_sub_32(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    __asm__ __volatile__ (
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r3, r3, r5\n\t"
#else
        "sub	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	%[r], %[r], %[r]\n\t"
#elif defined(__clang__)
        "sbcs	%[r], %[r]\n\t"
#else
        "sbc	%[r], %[r]\n\t"
#endif
        : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b)
        :
        : "memory", "r3", "r4", "r5", "r6", "cc"
    );
    return (word32)(size_t)r;
}

/* Square a and put result in r. (r = a * a)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 */
SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a)
{
    sp_digit* z0 = r;
    sp_digit* z2 = r + 64;
    sp_digit z1[64];
    sp_digit* a1 = z1;
    sp_digit* zero = z1 + 32;
    sp_digit u;
    sp_digit mask;
    sp_digit* p1;
    sp_digit* p2;

    XMEMSET(zero, 0, sizeof(sp_digit) * 32);

    mask = sp_2048_sub_32(a1, a, &a[32]);
    p1 = (sp_digit*)(((sp_digit)zero &   mask ) | ((sp_digit)a1 & (~mask)));
    p2 = (sp_digit*)(((sp_digit)zero & (~mask)) | ((sp_digit)a1 &   mask ));
    (void)sp_2048_sub_32(a1, p1, p2);

    sp_2048_sqr_32(z2, &a[32]);
    sp_2048_sqr_32(z0, a);
    sp_2048_sqr_32(z1, a1);

    u = 0;
    u -= sp_2048_sub_in_place_64(z1, z2);
    u -= sp_2048_sub_in_place_64(z1, z0);
    u += sp_2048_sub_in_place_64(r + 32, z1);
    sp_2048_add_word_32(r + 96, r + 96, u);
}

#endif /* !WOLFSSL_SP_SMALL */
#ifdef WOLFSSL_SP_SMALL
/* Add b to a into r. (r = a + b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    __asm__ __volatile__ (
        "movs	r6, %[a]\n\t"
        "movs	r7, #0\n\t"
        "movs	r3, #0\n\t"
        "movs	r4, #0xff\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, #1\n\t"
#else
        "add	r4, r4, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r7, r7, #1\n\t"
#else
        "sub	r7, r7, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r6, r6, r4\n\t"
#else
        "add	r6, r6, r4\n\t"
#endif
        "\n"
    "L_sp_2048_add_64_word_%=:\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
        "ldr	r4, [%[a]]\n\t"
        "ldr	r5, [%[b]]\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r5\n\t"
#elif defined(__clang__)
        "adcs	r4, r5\n\t"
#else
        "adc	r4, r5\n\t"
#endif
        "str	r4, [%[r]]\n\t"
        "movs	r3, #0\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r3\n\t"
#elif defined(__clang__)
        "adcs	r3, r3\n\t"
#else
        "adc	r3, r3\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	%[a], %[a], #4\n\t"
#else
        "add	%[a], %[a], #4\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	%[b], %[b], #4\n\t"
#else
        "add	%[b], %[b], #4\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	%[r], %[r], #4\n\t"
#else
        "add	%[r], %[r], #4\n\t"
#endif
        "cmp	%[a], r6\n\t"
        "bne	L_sp_2048_add_64_word_%=\n\t"
        "movs	%[r], r3\n\t"
        : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b)
        :
        : "memory", "r3", "r4", "r5", "r6", "r7", "cc"
    );
    return (word32)(size_t)r;
}

#endif /* WOLFSSL_SP_SMALL */
#ifdef WOLFSSL_SP_SMALL
/* Sub b from a into a. (a -= b)
 *
 * a  A single precision integer.
 * b  A single precision integer.
 */
SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a,
        const sp_digit* b)
{
    __asm__ __volatile__ (
        "movs	r7, %[a]\n\t"
        "movs	r2, #0\n\t"
        "movs	r5, #0xff\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, #1\n\t"
#else
        "add	r5, r5, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r7, r7, r5\n\t"
#else
        "add	r7, r7, r5\n\t"
#endif
        "\n"
    "L_sp_2048_sub_in_place_64_words_%=:\n\t"
        "movs	r5, #0\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r5, r5, r2\n\t"
#else
        "sub	r5, r5, r2\n\t"
#endif
        "ldr	r3, [%[a]]\n\t"
        "ldr	r4, [%[a], #4]\n\t"
        "ldr	r5, [%[b]]\n\t"
        "ldr	r6, [%[b], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "str	r3, [%[a]]\n\t"
        "str	r4, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r2\n\t"
#elif defined(__clang__)
        "sbcs	r2, r2\n\t"
#else
        "sbc	r2, r2\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	%[a], %[a], #8\n\t"
#else
        "add	%[a], %[a], #8\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	%[b], %[b], #8\n\t"
#else
        "add	%[b], %[b], #8\n\t"
#endif
        "cmp	%[a], r7\n\t"
        "bne	L_sp_2048_sub_in_place_64_words_%=\n\t"
        "movs	%[a], r2\n\t"
        : [a] "+l" (a), [b] "+l" (b)
        :
        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "cc"
    );
    return (word32)(size_t)a;
}

#endif /* WOLFSSL_SP_SMALL */
#ifdef WOLFSSL_SP_SMALL
/* Multiply a and b into r. (r = a * b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    sp_digit t[64 * 2];
    sp_digit* tmp = t;
    __asm__ __volatile__ (
        "movs	r3, #0\n\t"
        "movs	r4, #0\n\t"
        "mov	r8, r3\n\t"
        "mov	r11, %[tmp]\n\t"
        "mov	r9, %[a]\n\t"
        "mov	r10, %[b]\n\t"
        "movs	r6, #0xff\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r6, r6, #1\n\t"
#else
        "add	r6, r6, #1\n\t"
#endif
        "add	r6, r6, r9\n\t"
        "mov	r12, r6\n\t"
        "\n"
    "L_sp_2048_mul_64_words_%=:\n\t"
        "movs	%[tmp], #0\n\t"
        "movs	r5, #0\n\t"
        "movs	r6, #0xfc\n\t"
        "mov	%[a], r8\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	%[a], %[a], r6\n\t"
#else
        "sub	%[a], %[a], r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r6, r6, r6\n\t"
#elif defined(__clang__)
        "sbcs	r6, r6\n\t"
#else
        "sbc	r6, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "mvns	r6, r6\n\t"
#else
        "mvn	r6, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "ands	%[a], %[a], r6\n\t"
#elif defined(__clang__)
        "ands	%[a], r6\n\t"
#else
        "and	%[a], r6\n\t"
#endif
        "mov	%[b], r8\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	%[b], %[b], %[a]\n\t"
#else
        "sub	%[b], %[b], %[a]\n\t"
#endif
        "add	%[a], %[a], r9\n\t"
        "add	%[b], %[b], r10\n\t"
        "\n"
    "L_sp_2048_mul_64_mul_%=:\n\t"
        "# Multiply Start\n\t"
        "ldrh	r6, [%[a]]\n\t"
        "ldrh	r7, [%[b]]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[tmp]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[tmp]\n\t"
#else
        "adc	r4, %[tmp]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[tmp]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[tmp]\n\t"
#else
        "adc	r5, %[tmp]\n\t"
#endif
        "ldr	r7, [%[b]]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r7, #16\n\t"
#else
        "lsr	r7, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[tmp]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[tmp]\n\t"
#else
        "adc	r5, %[tmp]\n\t"
#endif
        "ldr	r6, [%[a]]\n\t"
        "ldr	r7, [%[b]]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, #16\n\t"
#else
        "lsr	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r7, #16\n\t"
#else
        "lsr	r7, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[tmp]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[tmp]\n\t"
#else
        "adc	r5, %[tmp]\n\t"
#endif
        "ldrh	r7, [%[b]]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[tmp]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[tmp]\n\t"
#else
        "adc	r5, %[tmp]\n\t"
#endif
        "# Multiply Done\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	%[a], %[a], #4\n\t"
#else
        "add	%[a], %[a], #4\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	%[b], %[b], #4\n\t"
#else
        "sub	%[b], %[b], #4\n\t"
#endif
        "cmp	%[a], r12\n\t"
        "beq	L_sp_2048_mul_64_done_mul_%=\n\t"
        "mov	r6, r8\n\t"
        "add	r6, r6, r9\n\t"
        "cmp	%[a], r6\n\t"
        "ble	L_sp_2048_mul_64_mul_%=\n\t"
        "\n"
    "L_sp_2048_mul_64_done_mul_%=:\n\t"
        "mov	%[tmp], r11\n\t"
        "mov	r7, r8\n\t"
        "str	r3, [%[tmp], r7]\n\t"
        "movs	r3, r4\n\t"
        "movs	r4, r5\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r7, r7, #4\n\t"
#else
        "add	r7, r7, #4\n\t"
#endif
        "mov	r8, r7\n\t"
        "movs	r6, #0xff\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r6, r6, #0xf9\n\t"
#else
        "add	r6, r6, #0xf9\n\t"
#endif
        "cmp	r7, r6\n\t"
        "ble	L_sp_2048_mul_64_words_%=\n\t"
        "str	r3, [%[tmp], r7]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        : [a] "+l" (a), [b] "+l" (b), [tmp] "+l" (tmp)
        :
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc"
    );

    XMEMCPY(r, t, sizeof(t));
}

/* Square a and put result in r. (r = a * a)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 */
SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a)
{
    __asm__ __volatile__ (
        "movs	r3, #0\n\t"
        "movs	r4, #0\n\t"
        "movs	r5, #0\n\t"
        "mov	r8, r3\n\t"
        "mov	r11, %[r]\n\t"
        "movs	r6, #2\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #8\n\t"
#else
        "lsl	r6, r6, #8\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "negs	r6, r6\n\t"
#else
        "neg	r6, r6\n\t"
#endif
        "add	sp, sp, r6\n\t"
        "mov	r10, sp\n\t"
        "mov	r9, %[a]\n\t"
        "\n"
    "L_sp_2048_sqr_64_words_%=:\n\t"
        "movs	%[r], #0\n\t"
        "movs	r6, #0xfc\n\t"
        "mov	%[a], r8\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	%[a], %[a], r6\n\t"
#else
        "sub	%[a], %[a], r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r6, r6, r6\n\t"
#elif defined(__clang__)
        "sbcs	r6, r6\n\t"
#else
        "sbc	r6, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "mvns	r6, r6\n\t"
#else
        "mvn	r6, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "ands	%[a], %[a], r6\n\t"
#elif defined(__clang__)
        "ands	%[a], r6\n\t"
#else
        "and	%[a], r6\n\t"
#endif
        "mov	r2, r8\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r2, r2, %[a]\n\t"
#else
        "sub	r2, r2, %[a]\n\t"
#endif
        "add	%[a], %[a], r9\n\t"
        "add	r2, r2, r9\n\t"
        "\n"
    "L_sp_2048_sqr_64_mul_%=:\n\t"
        "cmp	r2, %[a]\n\t"
        "beq	L_sp_2048_sqr_64_sqr_%=\n\t"
        "# Multiply * 2: Start\n\t"
        "ldrh	r6, [%[a]]\n\t"
        "ldrh	r7, [r2]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "ldr	r7, [r2]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r7, #16\n\t"
#else
        "lsr	r7, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "ldr	r6, [%[a]]\n\t"
        "ldr	r7, [r2]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, #16\n\t"
#else
        "lsr	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r7, #16\n\t"
#else
        "lsr	r7, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "ldrh	r7, [r2]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "# Multiply * 2: Done\n\t"
        "bal	L_sp_2048_sqr_64_done_sqr_%=\n\t"
        "\n"
    "L_sp_2048_sqr_64_sqr_%=:\n\t"
        "mov	r12, r2\n\t"
        "ldr	r2, [%[a]]\n\t"
        "# Square: Start\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r2, #16\n\t"
#else
        "lsr	r7, r2, #16\n\t"
#endif
        "uxth	r6, r2\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r6, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r6\n\t"
#else
        "mul	r6, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r7, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r7\n\t"
#else
        "mul	r7, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r2, #16\n\t"
#else
        "lsr	r7, r2, #16\n\t"
#endif
        "uxth	r6, r2\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #15\n\t"
#else
        "lsr	r7, r6, #15\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #17\n\t"
#else
        "lsl	r6, r6, #17\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "# Square: Done\n\t"
        "mov	r2, r12\n\t"
        "\n"
    "L_sp_2048_sqr_64_done_sqr_%=:\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	%[a], %[a], #4\n\t"
#else
        "add	%[a], %[a], #4\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r2, r2, #4\n\t"
#else
        "sub	r2, r2, #4\n\t"
#endif
        "movs	r6, #0xff\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r6, r6, #1\n\t"
#else
        "add	r6, r6, #1\n\t"
#endif
        "add	r6, r6, r9\n\t"
        "cmp	%[a], r6\n\t"
        "beq	L_sp_2048_sqr_64_done_mul_%=\n\t"
        "cmp	%[a], r2\n\t"
        "bgt	L_sp_2048_sqr_64_done_mul_%=\n\t"
        "mov	r7, r8\n\t"
        "add	r7, r7, r9\n\t"
        "cmp	%[a], r7\n\t"
        "ble	L_sp_2048_sqr_64_mul_%=\n\t"
        "\n"
    "L_sp_2048_sqr_64_done_mul_%=:\n\t"
        "mov	%[r], r10\n\t"
        "mov	r7, r8\n\t"
        "str	r3, [%[r], r7]\n\t"
        "movs	r3, r4\n\t"
        "movs	r4, r5\n\t"
        "movs	r5, #0\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r7, r7, #4\n\t"
#else
        "add	r7, r7, #4\n\t"
#endif
        "mov	r8, r7\n\t"
        "movs	r6, #0xff\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r6, r6, #0xf9\n\t"
#else
        "add	r6, r6, #0xf9\n\t"
#endif
        "cmp	r7, r6\n\t"
        "ble	L_sp_2048_sqr_64_words_%=\n\t"
        "mov	%[a], r9\n\t"
        "str	r3, [%[r], r7]\n\t"
        "mov	%[r], r11\n\t"
        "mov	%[a], r10\n\t"
        "movs	r3, #0xff\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, #0xfd\n\t"
#else
        "add	r3, r3, #0xfd\n\t"
#endif
        "\n"
    "L_sp_2048_sqr_64_store_%=:\n\t"
        "ldr	r6, [%[a], r3]\n\t"
        "str	r6, [%[r], r3]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r3, r3, #4\n\t"
#else
        "sub	r3, r3, #4\n\t"
#endif
        "bge	L_sp_2048_sqr_64_store_%=\n\t"
        "movs	r6, #2\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #8\n\t"
#else
        "lsl	r6, r6, #8\n\t"
#endif
        "add	sp, sp, r6\n\t"
        : [r] "+l" (r), [a] "+l" (a)
        :
        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc"
    );
}

#endif /* WOLFSSL_SP_SMALL */
#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH)
#ifdef WOLFSSL_SP_SMALL
/* AND m into each word of a and store in r.
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * m  Mask to AND against each digit.
 */
static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m)
{
    int i;

    for (i=0; i<32; i++) {
        r[i] = a[i] & m;
    }
}

#endif /* WOLFSSL_SP_SMALL */
#ifdef WOLFSSL_SP_SMALL
/* Add b to a into r. (r = a + b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    __asm__ __volatile__ (
        "movs	r6, %[a]\n\t"
        "movs	r7, #0\n\t"
        "movs	r3, #0\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r6, r6, #0x80\n\t"
#else
        "add	r6, r6, #0x80\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r7, r7, #1\n\t"
#else
        "sub	r7, r7, #1\n\t"
#endif
        "\n"
    "L_sp_2048_add_32_word_%=:\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
        "ldr	r4, [%[a]]\n\t"
        "ldr	r5, [%[b]]\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r5\n\t"
#elif defined(__clang__)
        "adcs	r4, r5\n\t"
#else
        "adc	r4, r5\n\t"
#endif
        "str	r4, [%[r]]\n\t"
        "movs	r3, #0\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r3\n\t"
#elif defined(__clang__)
        "adcs	r3, r3\n\t"
#else
        "adc	r3, r3\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	%[a], %[a], #4\n\t"
#else
        "add	%[a], %[a], #4\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	%[b], %[b], #4\n\t"
#else
        "add	%[b], %[b], #4\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	%[r], %[r], #4\n\t"
#else
        "add	%[r], %[r], #4\n\t"
#endif
        "cmp	%[a], r6\n\t"
        "bne	L_sp_2048_add_32_word_%=\n\t"
        "movs	%[r], r3\n\t"
        : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b)
        :
        : "memory", "r3", "r4", "r5", "r6", "r7", "cc"
    );
    return (word32)(size_t)r;
}

#endif /* WOLFSSL_SP_SMALL */
#ifdef WOLFSSL_SP_SMALL
/* Sub b from a into a. (a -= b)
 *
 * a  A single precision integer.
 * b  A single precision integer.
 */
SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a,
        const sp_digit* b)
{
    __asm__ __volatile__ (
        "movs	r7, %[a]\n\t"
        "movs	r2, #0\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r7, r7, #0x80\n\t"
#else
        "add	r7, r7, #0x80\n\t"
#endif
        "\n"
    "L_sp_2048_sub_in_place_32_words_%=:\n\t"
        "movs	r5, #0\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r5, r5, r2\n\t"
#else
        "sub	r5, r5, r2\n\t"
#endif
        "ldr	r3, [%[a]]\n\t"
        "ldr	r4, [%[a], #4]\n\t"
        "ldr	r5, [%[b]]\n\t"
        "ldr	r6, [%[b], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "str	r3, [%[a]]\n\t"
        "str	r4, [%[a], #4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r2, r2, r2\n\t"
#elif defined(__clang__)
        "sbcs	r2, r2\n\t"
#else
        "sbc	r2, r2\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	%[a], %[a], #8\n\t"
#else
        "add	%[a], %[a], #8\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	%[b], %[b], #8\n\t"
#else
        "add	%[b], %[b], #8\n\t"
#endif
        "cmp	%[a], r7\n\t"
        "bne	L_sp_2048_sub_in_place_32_words_%=\n\t"
        "movs	%[a], r2\n\t"
        : [a] "+l" (a), [b] "+l" (b)
        :
        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "cc"
    );
    return (word32)(size_t)a;
}

#endif /* WOLFSSL_SP_SMALL */
#ifdef WOLFSSL_SP_SMALL
/* Multiply a and b into r. (r = a * b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    sp_digit t[32 * 2];
    sp_digit* tmp = t;
    __asm__ __volatile__ (
        "movs	r3, #0\n\t"
        "movs	r4, #0\n\t"
        "mov	r8, r3\n\t"
        "mov	r11, %[tmp]\n\t"
        "mov	r9, %[a]\n\t"
        "mov	r10, %[b]\n\t"
        "movs	r6, #0x80\n\t"
        "add	r6, r6, r9\n\t"
        "mov	r12, r6\n\t"
        "\n"
    "L_sp_2048_mul_32_words_%=:\n\t"
        "movs	%[tmp], #0\n\t"
        "movs	r5, #0\n\t"
        "movs	r6, #0x7c\n\t"
        "mov	%[a], r8\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	%[a], %[a], r6\n\t"
#else
        "sub	%[a], %[a], r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r6, r6, r6\n\t"
#elif defined(__clang__)
        "sbcs	r6, r6\n\t"
#else
        "sbc	r6, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "mvns	r6, r6\n\t"
#else
        "mvn	r6, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "ands	%[a], %[a], r6\n\t"
#elif defined(__clang__)
        "ands	%[a], r6\n\t"
#else
        "and	%[a], r6\n\t"
#endif
        "mov	%[b], r8\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	%[b], %[b], %[a]\n\t"
#else
        "sub	%[b], %[b], %[a]\n\t"
#endif
        "add	%[a], %[a], r9\n\t"
        "add	%[b], %[b], r10\n\t"
        "\n"
    "L_sp_2048_mul_32_mul_%=:\n\t"
        "# Multiply Start\n\t"
        "ldrh	r6, [%[a]]\n\t"
        "ldrh	r7, [%[b]]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[tmp]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[tmp]\n\t"
#else
        "adc	r4, %[tmp]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[tmp]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[tmp]\n\t"
#else
        "adc	r5, %[tmp]\n\t"
#endif
        "ldr	r7, [%[b]]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r7, #16\n\t"
#else
        "lsr	r7, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[tmp]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[tmp]\n\t"
#else
        "adc	r5, %[tmp]\n\t"
#endif
        "ldr	r6, [%[a]]\n\t"
        "ldr	r7, [%[b]]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, #16\n\t"
#else
        "lsr	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r7, #16\n\t"
#else
        "lsr	r7, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[tmp]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[tmp]\n\t"
#else
        "adc	r5, %[tmp]\n\t"
#endif
        "ldrh	r7, [%[b]]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[tmp]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[tmp]\n\t"
#else
        "adc	r5, %[tmp]\n\t"
#endif
        "# Multiply Done\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	%[a], %[a], #4\n\t"
#else
        "add	%[a], %[a], #4\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	%[b], %[b], #4\n\t"
#else
        "sub	%[b], %[b], #4\n\t"
#endif
        "cmp	%[a], r12\n\t"
        "beq	L_sp_2048_mul_32_done_mul_%=\n\t"
        "mov	r6, r8\n\t"
        "add	r6, r6, r9\n\t"
        "cmp	%[a], r6\n\t"
        "ble	L_sp_2048_mul_32_mul_%=\n\t"
        "\n"
    "L_sp_2048_mul_32_done_mul_%=:\n\t"
        "mov	%[tmp], r11\n\t"
        "mov	r7, r8\n\t"
        "str	r3, [%[tmp], r7]\n\t"
        "movs	r3, r4\n\t"
        "movs	r4, r5\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r7, r7, #4\n\t"
#else
        "add	r7, r7, #4\n\t"
#endif
        "mov	r8, r7\n\t"
        "movs	r6, #0xf8\n\t"
        "cmp	r7, r6\n\t"
        "ble	L_sp_2048_mul_32_words_%=\n\t"
        "str	r3, [%[tmp], r7]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        : [a] "+l" (a), [b] "+l" (b), [tmp] "+l" (tmp)
        :
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc"
    );

    XMEMCPY(r, t, sizeof(t));
}

/* Square a and put result in r. (r = a * a)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 */
SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a)
{
    __asm__ __volatile__ (
        "movs	r3, #0\n\t"
        "movs	r4, #0\n\t"
        "movs	r5, #0\n\t"
        "mov	r8, r3\n\t"
        "mov	r11, %[r]\n\t"
        "movs	r6, #0xff\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r6, r6, #1\n\t"
#else
        "add	r6, r6, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "negs	r6, r6\n\t"
#else
        "neg	r6, r6\n\t"
#endif
        "add	sp, sp, r6\n\t"
        "mov	r10, sp\n\t"
        "mov	r9, %[a]\n\t"
        "\n"
    "L_sp_2048_sqr_32_words_%=:\n\t"
        "movs	%[r], #0\n\t"
        "movs	r6, #0x7c\n\t"
        "mov	%[a], r8\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	%[a], %[a], r6\n\t"
#else
        "sub	%[a], %[a], r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r6, r6, r6\n\t"
#elif defined(__clang__)
        "sbcs	r6, r6\n\t"
#else
        "sbc	r6, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "mvns	r6, r6\n\t"
#else
        "mvn	r6, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "ands	%[a], %[a], r6\n\t"
#elif defined(__clang__)
        "ands	%[a], r6\n\t"
#else
        "and	%[a], r6\n\t"
#endif
        "mov	r2, r8\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r2, r2, %[a]\n\t"
#else
        "sub	r2, r2, %[a]\n\t"
#endif
        "add	%[a], %[a], r9\n\t"
        "add	r2, r2, r9\n\t"
        "\n"
    "L_sp_2048_sqr_32_mul_%=:\n\t"
        "cmp	r2, %[a]\n\t"
        "beq	L_sp_2048_sqr_32_sqr_%=\n\t"
        "# Multiply * 2: Start\n\t"
        "ldrh	r6, [%[a]]\n\t"
        "ldrh	r7, [r2]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "ldr	r7, [r2]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r7, #16\n\t"
#else
        "lsr	r7, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "ldr	r6, [%[a]]\n\t"
        "ldr	r7, [r2]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, #16\n\t"
#else
        "lsr	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r7, #16\n\t"
#else
        "lsr	r7, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "ldrh	r7, [r2]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "# Multiply * 2: Done\n\t"
        "bal	L_sp_2048_sqr_32_done_sqr_%=\n\t"
        "\n"
    "L_sp_2048_sqr_32_sqr_%=:\n\t"
        "mov	r12, r2\n\t"
        "ldr	r2, [%[a]]\n\t"
        "# Square: Start\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r2, #16\n\t"
#else
        "lsr	r7, r2, #16\n\t"
#endif
        "uxth	r6, r2\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r6, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r6\n\t"
#else
        "mul	r6, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r7, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r7\n\t"
#else
        "mul	r7, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r2, #16\n\t"
#else
        "lsr	r7, r2, #16\n\t"
#endif
        "uxth	r6, r2\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #15\n\t"
#else
        "lsr	r7, r6, #15\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #17\n\t"
#else
        "lsl	r6, r6, #17\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "# Square: Done\n\t"
        "mov	r2, r12\n\t"
        "\n"
    "L_sp_2048_sqr_32_done_sqr_%=:\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	%[a], %[a], #4\n\t"
#else
        "add	%[a], %[a], #4\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r2, r2, #4\n\t"
#else
        "sub	r2, r2, #4\n\t"
#endif
        "movs	r6, #0x80\n\t"
        "add	r6, r6, r9\n\t"
        "cmp	%[a], r6\n\t"
        "beq	L_sp_2048_sqr_32_done_mul_%=\n\t"
        "cmp	%[a], r2\n\t"
        "bgt	L_sp_2048_sqr_32_done_mul_%=\n\t"
        "mov	r7, r8\n\t"
        "add	r7, r7, r9\n\t"
        "cmp	%[a], r7\n\t"
        "ble	L_sp_2048_sqr_32_mul_%=\n\t"
        "\n"
    "L_sp_2048_sqr_32_done_mul_%=:\n\t"
        "mov	%[r], r10\n\t"
        "mov	r7, r8\n\t"
        "str	r3, [%[r], r7]\n\t"
        "movs	r3, r4\n\t"
        "movs	r4, r5\n\t"
        "movs	r5, #0\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r7, r7, #4\n\t"
#else
        "add	r7, r7, #4\n\t"
#endif
        "mov	r8, r7\n\t"
        "movs	r6, #0xf8\n\t"
        "cmp	r7, r6\n\t"
        "ble	L_sp_2048_sqr_32_words_%=\n\t"
        "mov	%[a], r9\n\t"
        "str	r3, [%[r], r7]\n\t"
        "mov	%[r], r11\n\t"
        "mov	%[a], r10\n\t"
        "movs	r3, #0xfc\n\t"
        "\n"
    "L_sp_2048_sqr_32_store_%=:\n\t"
        "ldr	r6, [%[a], r3]\n\t"
        "str	r6, [%[r], r3]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r3, r3, #4\n\t"
#else
        "sub	r3, r3, #4\n\t"
#endif
        "bge	L_sp_2048_sqr_32_store_%=\n\t"
        "movs	r6, #0xff\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r6, r6, #1\n\t"
#else
        "add	r6, r6, #1\n\t"
#endif
        "add	sp, sp, r6\n\t"
        : [r] "+l" (r), [a] "+l" (a)
        :
        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc"
    );
}

#endif /* WOLFSSL_SP_SMALL */
#endif /* (WOLFSSL_HAVE_SP_RSA & !WOLFSSL_RSA_PUBLIC_ONLY) | WOLFSSL_HAVE_SP_DH */

/* Calculate the bottom digit of -1/a mod 2^n.
 *
 * a    A single precision number.
 * rho  Bottom word of inverse.
 */
static void sp_2048_mont_setup(const sp_digit* a, sp_digit* rho)
{
    sp_digit x;
    sp_digit b;

    b = a[0];
    x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
    x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
    x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
    x *= 2 - b * x;               /* here x*a==1 mod 2**32 */

    /* rho = -1/m mod b */
    *rho = (sp_digit)0 - x;
}

/* Mul a by digit b into r. (r = a * b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision digit.
 */
SP_NOINLINE static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a,
        sp_digit b)
{
    __asm__ __volatile__ (
        "movs	r6, #0xff\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r6, r6, #1\n\t"
#else
        "add	r6, r6, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r6, r6, %[a]\n\t"
#else
        "add	r6, r6, %[a]\n\t"
#endif
        "mov	r8, %[r]\n\t"
        "mov	r9, r6\n\t"
        "movs	r3, #0\n\t"
        "movs	r4, #0\n\t"
        "\n"
    "L_sp_2048_mul_d_64_%=:\n\t"
        "movs	%[r], #0\n\t"
        "movs	r5, #0\n\t"
        "# A[] * B\n\t"
        "ldrh	r6, [%[a]]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "ldr	r6, [%[a]]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, #16\n\t"
#else
        "lsr	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "# A[] * B - Done\n\t"
        "mov	%[r], r8\n\t"
        "str	r3, [%[r]]\n\t"
        "movs	r3, r4\n\t"
        "movs	r4, r5\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	%[r], %[r], #4\n\t"
#else
        "add	%[r], %[r], #4\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	%[a], %[a], #4\n\t"
#else
        "add	%[a], %[a], #4\n\t"
#endif
        "mov	r8, %[r]\n\t"
        "cmp	%[a], r9\n\t"
        "blt	L_sp_2048_mul_d_64_%=\n\t"
        "str	r3, [%[r]]\n\t"
        : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b)
        :
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc"
    );
}

#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH)
/* r = 2^n mod m where n is the number of bits to reduce by.
 * Given m must be 2048 bits, just need to subtract.
 *
 * r  A single precision number.
 * m  A single precision number.
 */
static void sp_2048_mont_norm_32(sp_digit* r, const sp_digit* m)
{
    XMEMSET(r, 0, sizeof(sp_digit) * 32);

    /* r = 2^n mod m */
    sp_2048_sub_in_place_32(r, m);
}

/* Conditionally subtract b from a using the mask m.
 * m is -1 to subtract and 0 when not copying.
 *
 * r  A single precision number representing condition subtract result.
 * a  A single precision number to subtract from.
 * b  A single precision number to subtract.
 * m  Mask value to apply.
 */
SP_NOINLINE static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a,
        const sp_digit* b, sp_digit m)
{
    __asm__ __volatile__ (
        "movs	r4, #0\n\t"
        "movs	r5, #0x80\n\t"
        "mov	r8, r5\n\t"
        "movs	r7, #0\n\t"
        "\n"
    "L_sp_2048_cond_sub_32_words_%=:\n\t"
        "ldr	r6, [%[b], r7]\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, %[m]\n\t"
#elif defined(__clang__)
        "ands	r6, %[m]\n\t"
#else
        "and	r6, %[m]\n\t"
#endif
        "movs	r5, #0\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r5, r5, r4\n\t"
#else
        "sub	r5, r5, r4\n\t"
#endif
        "ldr	r5, [%[a], r7]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r4\n\t"
#elif defined(__clang__)
        "sbcs	r4, r4\n\t"
#else
        "sbc	r4, r4\n\t"
#endif
        "str	r5, [%[r], r7]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r7, r7, #4\n\t"
#else
        "add	r7, r7, #4\n\t"
#endif
        "cmp	r7, r8\n\t"
        "blt	L_sp_2048_cond_sub_32_words_%=\n\t"
        "movs	%[r], r4\n\t"
        : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b), [m] "+l" (m)
        :
        : "memory", "r4", "r5", "r6", "r7", "r8", "cc"
    );
    return (word32)(size_t)r;
}

#define sp_2048_mont_reduce_order_64   sp_2048_mont_reduce_64
/* Reduce the number back to 2048 bits using Montgomery reduction.
 *
 * a   A single precision number to reduce in place.
 * m   The single precision number representing the modulus.
 * mp  The digit representing the negative inverse of m mod 2^n.
 */
SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m,
        sp_digit mp)
{
    __asm__ __volatile__ (
        "movs	r7, #0\n\t"
        "mov	r8, %[mp]\n\t"
        "mov	r12, r7\n\t"
        "mov	lr, %[m]\n\t"
        "mov	r9, %[a]\n\t"
        "mov	r11, %[a]\n\t"
        "movs	r5, #0x7c\n\t"
        "movs	r6, #0x80\n\t"
        "add	r9, r9, r5\n\t"
        "add	r11, r11, r6\n\t"
        "\n"
    "L_sp_2048_mont_reduce_32_mod_%=:\n\t"
        "movs	r7, #0\n\t"
        "movs	r4, #0\n\t"
        "# a[i] += m[0] * mu\n\t"
        "ldm	%[m]!, {%[mp]}\n\t"
        "ldm	%[a]!, {r3}\n\t"
        "# mu = a[i] * mp\n\t"
        "mov	r5, r8\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r3, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r3\n\t"
#else
        "mul	r5, r3\n\t"
#endif
        "mov	r10, r5\n\t"
        "# Multiply m[0] and mu - Start\n\t"
        "mov	r5, r10\n\t"
        "uxth	r6, %[mp]\n\t"
        "uxth	r5, r5\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[mp], #16\n\t"
#else
        "lsr	r6, %[mp], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "mov	r5, r10\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[mp], #16\n\t"
#else
        "lsr	r6, %[mp], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, r5, #16\n\t"
#else
        "lsr	r5, r5, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
        "uxth	r6, %[mp]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "# Multiply m[0] and mu - Done\n\t"
        "\n"
    "L_sp_2048_mont_reduce_32_word_%=:\n\t"
        "# a[i+j] += m[j] * mu\n\t"
        "ldr	r3, [%[a]]\n\t"
        "ldm	%[m]!, {%[mp]}\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r4\n\t"
#else
        "add	r3, r3, r4\n\t"
#endif
        "movs	r4, #0\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
        "# Multiply m[j] and mu - Start\n\t"
        "mov	r5, r10\n\t"
        "uxth	r6, %[mp]\n\t"
        "uxth	r5, r5\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[mp], #16\n\t"
#else
        "lsr	r6, %[mp], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "mov	r5, r10\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[mp], #16\n\t"
#else
        "lsr	r6, %[mp], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, r5, #16\n\t"
#else
        "lsr	r5, r5, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
        "uxth	r6, %[mp]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "# Multiply m[j] and mu - Done\n\t"
        "stm	%[a]!, {r3}\n\t"
        "cmp	%[a], r9\n\t"
        "blt	L_sp_2048_mont_reduce_32_word_%=\n\t"
        "# a[i+31] += m[31] * mu\n\t"
        "ldr	%[mp], [%[m]]\n\t"
        "mov	r3, r12\n\t"
        "# Multiply m[31] and mu - Start\n\t"
        "mov	r5, r10\n\t"
        "uxth	r6, %[mp]\n\t"
        "uxth	r5, r5\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r7, r7, r7\n\t"
#elif defined(__clang__)
        "adcs	r7, r7\n\t"
#else
        "adc	r7, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[mp], #16\n\t"
#else
        "lsr	r6, %[mp], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r7, r7, r7\n\t"
#elif defined(__clang__)
        "adcs	r7, r7\n\t"
#else
        "adc	r7, r7\n\t"
#endif
        "mov	r5, r10\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[mp], #16\n\t"
#else
        "lsr	r6, %[mp], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, r5, #16\n\t"
#else
        "lsr	r5, r5, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r7, r7, r7\n\t"
#elif defined(__clang__)
        "adcs	r7, r7\n\t"
#else
        "adc	r7, r7\n\t"
#endif
        "uxth	r6, %[mp]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r7, r7, r7\n\t"
#elif defined(__clang__)
        "adcs	r7, r7\n\t"
#else
        "adc	r7, r7\n\t"
#endif
        "# Multiply m[31] and mu - Done\n\t"
        "ldr	r5, [%[a]]\n\t"
        "ldr	r6, [%[a], #4]\n\t"
        "movs	%[mp], #0\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r4\n\t"
#else
        "add	r5, r5, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r6, r6, r3\n\t"
#elif defined(__clang__)
        "adcs	r6, r3\n\t"
#else
        "adc	r6, r3\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r7, r7, %[mp]\n\t"
#elif defined(__clang__)
        "adcs	r7, %[mp]\n\t"
#else
        "adc	r7, %[mp]\n\t"
#endif
        "stm	%[a]!, {r5, r6}\n\t"
        "# i += 1\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	%[a], %[a], #4\n\t"
#else
        "sub	%[a], %[a], #4\n\t"
#endif
        "movs	r3, #0x7c\n\t"
        "mov	r9, %[a]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	%[a], %[a], r3\n\t"
#else
        "sub	%[a], %[a], r3\n\t"
#endif
        "mov	r12, r7\n\t"
        "mov	%[m], lr\n\t"
        "cmp	r11, %[a]\n\t"
        "bgt	L_sp_2048_mont_reduce_32_mod_%=\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "negs	r7, r7\n\t"
#else
        "neg	r7, r7\n\t"
#endif
        "# Subtract masked modulus\n\t"
        "movs	r4, #0x80\n\t"
        "movs	%[mp], #0\n\t"
        "movs	r3, #0\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	%[a], %[a], r4\n\t"
#else
        "sub	%[a], %[a], r4\n\t"
#endif
#ifndef WOLFSSL_SP_LARGE_CODE
        "\n"
    "L_sp_2048_mont_reduce_32_sub_mask_%=:\n\t"
        "ldm	%[m]!, {r6}\n\t"
        "movs	r5, #0\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r5, r5, %[mp]\n\t"
#else
        "sub	r5, r5, %[mp]\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	%[mp], %[mp], %[mp]\n\t"
#elif defined(__clang__)
        "sbcs	%[mp], %[mp]\n\t"
#else
        "sbc	%[mp], %[mp]\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, #4\n\t"
#else
        "add	r3, r3, #4\n\t"
#endif
        "cmp	r3, r4\n\t"
        "blt	L_sp_2048_mont_reduce_32_sub_mask_%=\n\t"
#else /* WOLFSSL_SP_LARGE_CODE */
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r5, r5, r6\n\t"
#else
        "sub	r5, r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
#endif /* WOLFSSL_SP_LARGE_CODE */
        : [a] "+l" (a), [m] "+l" (m), [mp] "+l" (mp)
        :
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc"
    );
}

/* Multiply two Montgomery form numbers mod the modulus (prime).
 * (r = a * b mod m)
 *
 * r   Result of multiplication.
 * a   First number to multiply in Montgomery form.
 * b   Second number to multiply in Montgomery form.
 * m   Modulus (prime).
 * mp  Montgomery multiplier.
 */
SP_NOINLINE static void sp_2048_mont_mul_32(sp_digit* r, const sp_digit* a,
        const sp_digit* b, const sp_digit* m, sp_digit mp)
{
    sp_2048_mul_32(r, a, b);
    sp_2048_mont_reduce_32(r, m, mp);
}

/* Square the Montgomery form number. (r = a * a mod m)
 *
 * r   Result of squaring.
 * a   Number to square in Montgomery form.
 * m   Modulus (prime).
 * mp  Montgomery multiplier.
 */
SP_NOINLINE static void sp_2048_mont_sqr_32(sp_digit* r, const sp_digit* a,
        const sp_digit* m, sp_digit mp)
{
    sp_2048_sqr_32(r, a);
    sp_2048_mont_reduce_32(r, m, mp);
}

/* Mul a by digit b into r. (r = a * b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision digit.
 */
SP_NOINLINE static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a,
        sp_digit b)
{
    __asm__ __volatile__ (
        "movs	r6, #0x80\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r6, r6, %[a]\n\t"
#else
        "add	r6, r6, %[a]\n\t"
#endif
        "mov	r8, %[r]\n\t"
        "mov	r9, r6\n\t"
        "movs	r3, #0\n\t"
        "movs	r4, #0\n\t"
        "\n"
    "L_sp_2048_mul_d_32_%=:\n\t"
        "movs	%[r], #0\n\t"
        "movs	r5, #0\n\t"
        "# A[] * B\n\t"
        "ldrh	r6, [%[a]]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "ldr	r6, [%[a]]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, #16\n\t"
#else
        "lsr	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "# A[] * B - Done\n\t"
        "mov	%[r], r8\n\t"
        "str	r3, [%[r]]\n\t"
        "movs	r3, r4\n\t"
        "movs	r4, r5\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	%[r], %[r], #4\n\t"
#else
        "add	%[r], %[r], #4\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	%[a], %[a], #4\n\t"
#else
        "add	%[a], %[a], #4\n\t"
#endif
        "mov	r8, %[r]\n\t"
        "cmp	%[a], r9\n\t"
        "blt	L_sp_2048_mul_d_32_%=\n\t"
        "str	r3, [%[r]]\n\t"
        : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b)
        :
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc"
    );
}

/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div)
 *
 * d1   The high order half of the number to divide.
 * d0   The low order half of the number to divide.
 * div  The divisor.
 * returns the result of the division.
 *
 * Note that this is an approximate div. It may give an answer 1 larger.
 */
SP_NOINLINE static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0,
        sp_digit div)
{
    __asm__ __volatile__ (
        "movs	r3, #0\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[div], #1\n\t"
#else
        "lsr	r5, %[div], #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, #1\n\t"
#else
        "add	r5, r5, #1\n\t"
#endif
        "mov	r8, %[d0]\n\t"
        "mov	r9, %[d1]\n\t"
        "# Do top 32\n\t"
        "movs	r6, r5\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r6, r6, %[d1]\n\t"
#else
        "sub	r6, r6, %[d1]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r6, r6, r6\n\t"
#elif defined(__clang__)
        "sbcs	r6, r6\n\t"
#else
        "sbc	r6, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r3\n\t"
#else
        "add	r3, r3, r3\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r3, r3, r6\n\t"
#else
        "sub	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r5\n\t"
#elif defined(__clang__)
        "ands	r6, r5\n\t"
#else
        "and	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	%[d1], %[d1], r6\n\t"
#else
        "sub	%[d1], %[d1], r6\n\t"
#endif
        "movs	r4, #29\n\t"
        "\n"
    "L_div_2048_word_32_loop_%=:\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	%[d0], %[d0], #1\n\t"
#else
        "lsl	%[d0], %[d0], #1\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	%[d1], %[d1], %[d1]\n\t"
#elif defined(__clang__)
        "adcs	%[d1], %[d1]\n\t"
#else
        "adc	%[d1], %[d1]\n\t"
#endif
        "movs	r6, r5\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r6, r6, %[d1]\n\t"
#else
        "sub	r6, r6, %[d1]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r6, r6, r6\n\t"
#elif defined(__clang__)
        "sbcs	r6, r6\n\t"
#else
        "sbc	r6, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r3\n\t"
#else
        "add	r3, r3, r3\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r3, r3, r6\n\t"
#else
        "sub	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r5\n\t"
#elif defined(__clang__)
        "ands	r6, r5\n\t"
#else
        "and	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	%[d1], %[d1], r6\n\t"
#else
        "sub	%[d1], %[d1], r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r4, r4, #1\n\t"
#else
        "sub	r4, r4, #1\n\t"
#endif
        "bpl	L_div_2048_word_32_loop_%=\n\t"
        "movs	r7, #0\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r3\n\t"
#else
        "add	r3, r3, r3\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, #1\n\t"
#else
        "add	r3, r3, #1\n\t"
#endif
        "# r * div - Start\n\t"
        "uxth	%[d1], r3\n\t"
        "uxth	r4, %[div]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r4, %[d1], r4\n\t"
#elif defined(__clang__)
        "muls	r4, %[d1]\n\t"
#else
        "mul	r4, %[d1]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[div], #16\n\t"
#else
        "lsr	r6, %[div], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	%[d1], r6, %[d1]\n\t"
#elif defined(__clang__)
        "muls	%[d1], r6\n\t"
#else
        "mul	%[d1], r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[d1], #16\n\t"
#else
        "lsr	r5, %[d1], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	%[d1], %[d1], #16\n\t"
#else
        "lsl	%[d1], %[d1], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, %[d1]\n\t"
#else
        "add	r4, r4, %[d1]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	%[d1], r3, #16\n\t"
#else
        "lsr	%[d1], r3, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, %[d1], r6\n\t"
#elif defined(__clang__)
        "muls	r6, %[d1]\n\t"
#else
        "mul	r6, %[d1]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
        "uxth	r6, %[div]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	%[d1], r6, %[d1]\n\t"
#elif defined(__clang__)
        "muls	%[d1], r6\n\t"
#else
        "mul	%[d1], r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[d1], #16\n\t"
#else
        "lsr	r6, %[d1], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	%[d1], %[d1], #16\n\t"
#else
        "lsl	%[d1], %[d1], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, %[d1]\n\t"
#else
        "add	r4, r4, %[d1]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "adcs	r5, r6\n\t"
#else
        "adc	r5, r6\n\t"
#endif
        "# r * div - Done\n\t"
        "mov	%[d1], r8\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	%[d1], %[d1], r4\n\t"
#else
        "sub	%[d1], %[d1], r4\n\t"
#endif
        "movs	r4, %[d1]\n\t"
        "mov	%[d1], r9\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	%[d1], %[d1], r5\n\t"
#elif defined(__clang__)
        "sbcs	%[d1], r5\n\t"
#else
        "sbc	%[d1], r5\n\t"
#endif
        "movs	r5, %[d1]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
        "# r * div - Start\n\t"
        "uxth	%[d1], r3\n\t"
        "uxth	r4, %[div]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r4, %[d1], r4\n\t"
#elif defined(__clang__)
        "muls	r4, %[d1]\n\t"
#else
        "mul	r4, %[d1]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[div], #16\n\t"
#else
        "lsr	r6, %[div], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	%[d1], r6, %[d1]\n\t"
#elif defined(__clang__)
        "muls	%[d1], r6\n\t"
#else
        "mul	%[d1], r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[d1], #16\n\t"
#else
        "lsr	r5, %[d1], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	%[d1], %[d1], #16\n\t"
#else
        "lsl	%[d1], %[d1], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, %[d1]\n\t"
#else
        "add	r4, r4, %[d1]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	%[d1], r3, #16\n\t"
#else
        "lsr	%[d1], r3, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, %[d1], r6\n\t"
#elif defined(__clang__)
        "muls	r6, %[d1]\n\t"
#else
        "mul	r6, %[d1]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
        "uxth	r6, %[div]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	%[d1], r6, %[d1]\n\t"
#elif defined(__clang__)
        "muls	%[d1], r6\n\t"
#else
        "mul	%[d1], r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[d1], #16\n\t"
#else
        "lsr	r6, %[d1], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	%[d1], %[d1], #16\n\t"
#else
        "lsl	%[d1], %[d1], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, %[d1]\n\t"
#else
        "add	r4, r4, %[d1]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "adcs	r5, r6\n\t"
#else
        "adc	r5, r6\n\t"
#endif
        "# r * div - Done\n\t"
        "mov	%[d1], r8\n\t"
        "mov	r6, r9\n\t"
#ifdef WOLFSSL_KEIL
        "subs	r4, %[d1], r4\n\t"
#else
#ifdef __clang__
        "subs	r4, %[d1], r4\n\t"
#else
        "sub	r4, %[d1], r4\n\t"
#endif
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r6, r6, r5\n\t"
#elif defined(__clang__)
        "sbcs	r6, r5\n\t"
#else
        "sbc	r6, r5\n\t"
#endif
        "movs	r5, r6\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
        "# r * div - Start\n\t"
        "uxth	%[d1], r3\n\t"
        "uxth	r4, %[div]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r4, %[d1], r4\n\t"
#elif defined(__clang__)
        "muls	r4, %[d1]\n\t"
#else
        "mul	r4, %[d1]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[div], #16\n\t"
#else
        "lsr	r6, %[div], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	%[d1], r6, %[d1]\n\t"
#elif defined(__clang__)
        "muls	%[d1], r6\n\t"
#else
        "mul	%[d1], r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[d1], #16\n\t"
#else
        "lsr	r5, %[d1], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	%[d1], %[d1], #16\n\t"
#else
        "lsl	%[d1], %[d1], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, %[d1]\n\t"
#else
        "add	r4, r4, %[d1]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	%[d1], r3, #16\n\t"
#else
        "lsr	%[d1], r3, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, %[d1], r6\n\t"
#elif defined(__clang__)
        "muls	r6, %[d1]\n\t"
#else
        "mul	r6, %[d1]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
        "uxth	r6, %[div]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	%[d1], r6, %[d1]\n\t"
#elif defined(__clang__)
        "muls	%[d1], r6\n\t"
#else
        "mul	%[d1], r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[d1], #16\n\t"
#else
        "lsr	r6, %[d1], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	%[d1], %[d1], #16\n\t"
#else
        "lsl	%[d1], %[d1], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, %[d1]\n\t"
#else
        "add	r4, r4, %[d1]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "adcs	r5, r6\n\t"
#else
        "adc	r5, r6\n\t"
#endif
        "# r * div - Done\n\t"
        "mov	%[d1], r8\n\t"
        "mov	r6, r9\n\t"
#ifdef WOLFSSL_KEIL
        "subs	r4, %[d1], r4\n\t"
#else
#ifdef __clang__
        "subs	r4, %[d1], r4\n\t"
#else
        "sub	r4, %[d1], r4\n\t"
#endif
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r6, r6, r5\n\t"
#elif defined(__clang__)
        "sbcs	r6, r5\n\t"
#else
        "sbc	r6, r5\n\t"
#endif
        "movs	r5, r6\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
        "# r * div - Start\n\t"
        "uxth	%[d1], r3\n\t"
        "uxth	r4, %[div]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r4, %[d1], r4\n\t"
#elif defined(__clang__)
        "muls	r4, %[d1]\n\t"
#else
        "mul	r4, %[d1]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[div], #16\n\t"
#else
        "lsr	r6, %[div], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	%[d1], r6, %[d1]\n\t"
#elif defined(__clang__)
        "muls	%[d1], r6\n\t"
#else
        "mul	%[d1], r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[d1], #16\n\t"
#else
        "lsr	r5, %[d1], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	%[d1], %[d1], #16\n\t"
#else
        "lsl	%[d1], %[d1], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, %[d1]\n\t"
#else
        "add	r4, r4, %[d1]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	%[d1], r3, #16\n\t"
#else
        "lsr	%[d1], r3, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, %[d1], r6\n\t"
#elif defined(__clang__)
        "muls	r6, %[d1]\n\t"
#else
        "mul	r6, %[d1]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
        "uxth	r6, %[div]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	%[d1], r6, %[d1]\n\t"
#elif defined(__clang__)
        "muls	%[d1], r6\n\t"
#else
        "mul	%[d1], r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[d1], #16\n\t"
#else
        "lsr	r6, %[d1], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	%[d1], %[d1], #16\n\t"
#else
        "lsl	%[d1], %[d1], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, %[d1]\n\t"
#else
        "add	r4, r4, %[d1]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "adcs	r5, r6\n\t"
#else
        "adc	r5, r6\n\t"
#endif
        "# r * div - Done\n\t"
        "mov	%[d1], r8\n\t"
        "mov	r6, r9\n\t"
#ifdef WOLFSSL_KEIL
        "subs	r4, %[d1], r4\n\t"
#else
#ifdef __clang__
        "subs	r4, %[d1], r4\n\t"
#else
        "sub	r4, %[d1], r4\n\t"
#endif
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r6, r6, r5\n\t"
#elif defined(__clang__)
        "sbcs	r6, r5\n\t"
#else
        "sbc	r6, r5\n\t"
#endif
        "movs	r5, r6\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
        "movs	r6, %[div]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r6, r6, r4\n\t"
#else
        "sub	r6, r6, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r6, r6, r6\n\t"
#elif defined(__clang__)
        "sbcs	r6, r6\n\t"
#else
        "sbc	r6, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r3, r3, r6\n\t"
#else
        "sub	r3, r3, r6\n\t"
#endif
        "movs	%[d1], r3\n\t"
        : [d1] "+l" (d1), [d0] "+l" (d0), [div] "+l" (div)
        :
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc"
    );
    return (word32)(size_t)d1;
}

/* Compare a with b in constant time.
 *
 * a  A single precision integer.
 * b  A single precision integer.
 * return -ve, 0 or +ve if a is less than, equal to or greater than b
 * respectively.
 */
SP_NOINLINE static sp_int32 sp_2048_cmp_32(const sp_digit* a, const sp_digit* b)
{
    __asm__ __volatile__ (
        "movs	r2, #0\n\t"
        "movs	r3, #0\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "mvns	r3, r3\n\t"
#else
        "mvn	r3, r3\n\t"
#endif
        "movs	r6, #0x7c\n\t"
        "\n"
    "L_sp_2048_cmp_32_words_%=:\n\t"
        "ldr	r7, [%[a], r6]\n\t"
        "ldr	r5, [%[b], r6]\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r7, r7, r3\n\t"
#elif defined(__clang__)
        "ands	r7, r3\n\t"
#else
        "and	r7, r3\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "ands	r5, r5, r3\n\t"
#elif defined(__clang__)
        "ands	r5, r3\n\t"
#else
        "and	r5, r3\n\t"
#endif
        "movs	r4, r7\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r7, r7, r5\n\t"
#else
        "sub	r7, r7, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r7, r7, r7\n\t"
#elif defined(__clang__)
        "sbcs	r7, r7\n\t"
#else
        "sbc	r7, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r7\n\t"
#else
        "add	r2, r2, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "mvns	r7, r7\n\t"
#else
        "mvn	r7, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "ands	r3, r3, r7\n\t"
#elif defined(__clang__)
        "ands	r3, r7\n\t"
#else
        "and	r3, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r5, r5, r4\n\t"
#else
        "sub	r5, r5, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r7, r7, r7\n\t"
#elif defined(__clang__)
        "sbcs	r7, r7\n\t"
#else
        "sbc	r7, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r2, r2, r7\n\t"
#else
        "sub	r2, r2, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "mvns	r7, r7\n\t"
#else
        "mvn	r7, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "ands	r3, r3, r7\n\t"
#elif defined(__clang__)
        "ands	r3, r7\n\t"
#else
        "and	r3, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r6, r6, #4\n\t"
#else
        "sub	r6, r6, #4\n\t"
#endif
        "bge	L_sp_2048_cmp_32_words_%=\n\t"
        "movs	%[a], r2\n\t"
        : [a] "+l" (a), [b] "+l" (b)
        :
        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "cc"
    );
    return (word32)(size_t)a;
}

/* Divide d in a and put remainder into r (m*d + r = a)
 * m is not calculated as it is not needed at this time.
 *
 * a  Number to be divided.
 * d  Number to divide with.
 * m  Multiplier result.
 * r  Remainder from the division.
 * returns MP_OKAY indicating success.
 */
static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d,
        sp_digit* m, sp_digit* r)
{
    sp_digit t1[64], t2[33];
    sp_digit div, r1;
    int i;

    (void)m;

    div = d[31];
    XMEMCPY(t1, a, sizeof(*t1) * 2 * 32);
    r1 = sp_2048_cmp_32(&t1[32], d) >= 0;
    sp_2048_cond_sub_32(&t1[32], &t1[32], d, (sp_digit)0 - r1);
    for (i = 31; i >= 0; i--) {
        volatile sp_digit mask = (sp_digit)0 - (t1[32 + i] == div);
        sp_digit hi = t1[32 + i] + mask;
        r1 = div_2048_word_32(hi, t1[32 + i - 1], div);
        r1 |= mask;

        sp_2048_mul_d_32(t2, d, r1);
        t1[32 + i] += sp_2048_sub_in_place_32(&t1[i], t2);
        t1[32 + i] -= t2[32];
        sp_2048_mask_32(t2, d, t1[32 + i]);
        t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2);
        sp_2048_mask_32(t2, d, t1[32 + i]);
        t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2);
    }

    r1 = sp_2048_cmp_32(t1, d) >= 0;
    sp_2048_cond_sub_32(r, t1, d, (sp_digit)0 - r1);

    return MP_OKAY;
}

/* Reduce a modulo m into r. (r = a mod m)
 *
 * r  A single precision number that is the reduced result.
 * a  A single precision number that is to be reduced.
 * m  A single precision number that is the modulus to reduce with.
 * returns MP_OKAY indicating success.
 */
static WC_INLINE int sp_2048_mod_32(sp_digit* r, const sp_digit* a, const sp_digit* m)
{
    int ret;
    ret = sp_2048_div_32(a, m, NULL, r);
    return ret;
}

#ifdef WOLFSSL_SP_SMALL
/* Modular exponentiate a to the e mod m. (r = a^e mod m)
 *
 * r     A single precision number that is the result of the operation.
 * a     A single precision number being exponentiated.
 * e     A single precision number that is the exponent.
 * bits  The number of bits in the exponent.
 * m     A single precision number that is the modulus.
 * returns  0 on success.
 * returns  MEMORY_E on dynamic memory allocation failure.
 * returns  MP_VAL when base is even or exponent is 0.
 */
static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e,
        int bits, const sp_digit* m, int reduceA)
{
    SP_DECL_VAR(sp_digit, td, 16 * 64);
    sp_digit* t[16];
    sp_digit* norm = NULL;
    sp_digit mp = 1;
    sp_digit n;
    sp_digit mask;
    int i;
    int c;
    byte y;
    int err = MP_OKAY;

    if (bits == 0) {
        err = MP_VAL;
    }

    SP_ALLOC_VAR(sp_digit, td, 16 * 64, NULL, DYNAMIC_TYPE_TMP_BUFFER);
    if (err == MP_OKAY) {
        norm = td;
        for (i=0; i<16; i++) {
            t[i] = td + i * 64;
        }

        sp_2048_mont_setup(m, &mp);
        sp_2048_mont_norm_32(norm, m);

        XMEMSET(t[1], 0, sizeof(sp_digit) * 32U);
        if (reduceA != 0) {
            err = sp_2048_mod_32(t[1] + 32, a, m);
            if (err == MP_OKAY) {
                err = sp_2048_mod_32(t[1], t[1], m);
            }
        }
        else {
            XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32);
            err = sp_2048_mod_32(t[1], t[1], m);
        }
    }

    if (err == MP_OKAY) {
        sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp);
        sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp);
        sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp);
        sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp);
        sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp);
        sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp);
        sp_2048_mont_sqr_32(t[ 8], t[ 4], m, mp);
        sp_2048_mont_mul_32(t[ 9], t[ 5], t[ 4], m, mp);
        sp_2048_mont_sqr_32(t[10], t[ 5], m, mp);
        sp_2048_mont_mul_32(t[11], t[ 6], t[ 5], m, mp);
        sp_2048_mont_sqr_32(t[12], t[ 6], m, mp);
        sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp);
        sp_2048_mont_sqr_32(t[14], t[ 7], m, mp);
        sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp);

        i = (bits - 1) / 32;
        n = e[i--];
        c = bits & 31;
        if (c == 0) {
            c = 32;
        }
        c -= bits % 4;
        if (c == 32) {
            c = 28;
        }
        if (c < 0) {
            /* Number of bits in top word is less than number needed. */
            c = -c;
            y = (byte)(n << c);
            n = e[i--];
            y |= (byte)(n >> (64 - c));
            n <<= c;
            c = 64 - c;
        }
        else if (c == 0) {
            /* All bits in top word used. */
            y = (byte)n;
        }
        else {
            y = (byte)(n >> c);
            n <<= 32 - c;
        }
        XMEMCPY(r, t[y], sizeof(sp_digit) * 32);
        for (; i>=0 || c>=4; ) {
            if (c == 0) {
                n = e[i--];
                y = (byte)(n >> 28);
                n <<= 4;
                c = 28;
            }
            else if (c < 4) {
                y = (byte)(n >> 28);
                n = e[i--];
                c = 4 - c;
                y |= (byte)(n >> (32 - c));
                n <<= c;
                c = 32 - c;
            }
            else {
                y = (byte)((n >> 28) & 0xf);
                n <<= 4;
                c -= 4;
            }

            sp_2048_mont_sqr_32(r, r, m, mp);
            sp_2048_mont_sqr_32(r, r, m, mp);
            sp_2048_mont_sqr_32(r, r, m, mp);
            sp_2048_mont_sqr_32(r, r, m, mp);

            sp_2048_mont_mul_32(r, r, t[y], m, mp);
        }

        XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U);
        sp_2048_mont_reduce_32(r, m, mp);

        mask = (sp_digit)0 - (sp_2048_cmp_32(r, m) >= 0);
        sp_2048_cond_sub_32(r, r, m, mask);
    }

    SP_FREE_VAR(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);

    return err;
}
#else
/* Modular exponentiate a to the e mod m. (r = a^e mod m)
 *
 * r     A single precision number that is the result of the operation.
 * a     A single precision number being exponentiated.
 * e     A single precision number that is the exponent.
 * bits  The number of bits in the exponent.
 * m     A single precision number that is the modulus.
 * returns  0 on success.
 * returns  MEMORY_E on dynamic memory allocation failure.
 * returns  MP_VAL when base is even or exponent is 0.
 */
static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e,
        int bits, const sp_digit* m, int reduceA)
{
    SP_DECL_VAR(sp_digit, td, 32 * 64);
    sp_digit* t[32];
    sp_digit* norm = NULL;
    sp_digit mp = 1;
    sp_digit n;
    sp_digit mask;
    int i;
    int c;
    byte y;
    int err = MP_OKAY;

    if (bits == 0) {
        err = MP_VAL;
    }

    SP_ALLOC_VAR(sp_digit, td, 32 * 64, NULL, DYNAMIC_TYPE_TMP_BUFFER);
    if (err == MP_OKAY) {
        norm = td;
        for (i=0; i<32; i++) {
            t[i] = td + i * 64;
        }

        sp_2048_mont_setup(m, &mp);
        sp_2048_mont_norm_32(norm, m);

        XMEMSET(t[1], 0, sizeof(sp_digit) * 32U);
        if (reduceA != 0) {
            err = sp_2048_mod_32(t[1] + 32, a, m);
            if (err == MP_OKAY) {
                err = sp_2048_mod_32(t[1], t[1], m);
            }
        }
        else {
            XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32);
            err = sp_2048_mod_32(t[1], t[1], m);
        }
    }

    if (err == MP_OKAY) {
        sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp);
        sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp);
        sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp);
        sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp);
        sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp);
        sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp);
        sp_2048_mont_sqr_32(t[ 8], t[ 4], m, mp);
        sp_2048_mont_mul_32(t[ 9], t[ 5], t[ 4], m, mp);
        sp_2048_mont_sqr_32(t[10], t[ 5], m, mp);
        sp_2048_mont_mul_32(t[11], t[ 6], t[ 5], m, mp);
        sp_2048_mont_sqr_32(t[12], t[ 6], m, mp);
        sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp);
        sp_2048_mont_sqr_32(t[14], t[ 7], m, mp);
        sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp);
        sp_2048_mont_sqr_32(t[16], t[ 8], m, mp);
        sp_2048_mont_mul_32(t[17], t[ 9], t[ 8], m, mp);
        sp_2048_mont_sqr_32(t[18], t[ 9], m, mp);
        sp_2048_mont_mul_32(t[19], t[10], t[ 9], m, mp);
        sp_2048_mont_sqr_32(t[20], t[10], m, mp);
        sp_2048_mont_mul_32(t[21], t[11], t[10], m, mp);
        sp_2048_mont_sqr_32(t[22], t[11], m, mp);
        sp_2048_mont_mul_32(t[23], t[12], t[11], m, mp);
        sp_2048_mont_sqr_32(t[24], t[12], m, mp);
        sp_2048_mont_mul_32(t[25], t[13], t[12], m, mp);
        sp_2048_mont_sqr_32(t[26], t[13], m, mp);
        sp_2048_mont_mul_32(t[27], t[14], t[13], m, mp);
        sp_2048_mont_sqr_32(t[28], t[14], m, mp);
        sp_2048_mont_mul_32(t[29], t[15], t[14], m, mp);
        sp_2048_mont_sqr_32(t[30], t[15], m, mp);
        sp_2048_mont_mul_32(t[31], t[16], t[15], m, mp);

        i = (bits - 1) / 32;
        n = e[i--];
        c = bits & 31;
        if (c == 0) {
            c = 32;
        }
        c -= bits % 5;
        if (c == 32) {
            c = 27;
        }
        if (c < 0) {
            /* Number of bits in top word is less than number needed. */
            c = -c;
            y = (byte)(n << c);
            n = e[i--];
            y |= (byte)(n >> (64 - c));
            n <<= c;
            c = 64 - c;
        }
        else if (c == 0) {
            /* All bits in top word used. */
            y = (byte)n;
        }
        else {
            y = (byte)(n >> c);
            n <<= 32 - c;
        }
        XMEMCPY(r, t[y], sizeof(sp_digit) * 32);
        for (; i>=0 || c>=5; ) {
            if (c == 0) {
                n = e[i--];
                y = (byte)(n >> 27);
                n <<= 5;
                c = 27;
            }
            else if (c < 5) {
                y = (byte)(n >> 27);
                n = e[i--];
                c = 5 - c;
                y |= (byte)(n >> (32 - c));
                n <<= c;
                c = 32 - c;
            }
            else {
                y = (byte)((n >> 27) & 0x1f);
                n <<= 5;
                c -= 5;
            }

            sp_2048_mont_sqr_32(r, r, m, mp);
            sp_2048_mont_sqr_32(r, r, m, mp);
            sp_2048_mont_sqr_32(r, r, m, mp);
            sp_2048_mont_sqr_32(r, r, m, mp);
            sp_2048_mont_sqr_32(r, r, m, mp);

            sp_2048_mont_mul_32(r, r, t[y], m, mp);
        }

        XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U);
        sp_2048_mont_reduce_32(r, m, mp);

        mask = (sp_digit)0 - (sp_2048_cmp_32(r, m) >= 0);
        sp_2048_cond_sub_32(r, r, m, mask);
    }

    SP_FREE_VAR(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);

    return err;
}
#endif /* WOLFSSL_SP_SMALL */

#endif /* (WOLFSSL_HAVE_SP_RSA & !WOLFSSL_RSA_PUBLIC_ONLY) | WOLFSSL_HAVE_SP_DH */

#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH)
/* r = 2^n mod m where n is the number of bits to reduce by.
 * Given m must be 2048 bits, just need to subtract.
 *
 * r  A single precision number.
 * m  A single precision number.
 */
static void sp_2048_mont_norm_64(sp_digit* r, const sp_digit* m)
{
    XMEMSET(r, 0, sizeof(sp_digit) * 64);

    /* r = 2^n mod m */
    sp_2048_sub_in_place_64(r, m);
}

#endif /* (WOLFSSL_HAVE_SP_RSA & !WOLFSSL_RSA_PUBLIC_ONLY) | WOLFSSL_HAVE_SP_DH */
/* Conditionally subtract b from a using the mask m.
 * m is -1 to subtract and 0 when not copying.
 *
 * r  A single precision number representing condition subtract result.
 * a  A single precision number to subtract from.
 * b  A single precision number to subtract.
 * m  Mask value to apply.
 */
SP_NOINLINE static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a,
        const sp_digit* b, sp_digit m)
{
    __asm__ __volatile__ (
        "movs	r4, #0\n\t"
        "movs	r5, #0xff\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, #1\n\t"
#else
        "add	r5, r5, #1\n\t"
#endif
        "mov	r8, r5\n\t"
        "movs	r7, #0\n\t"
        "\n"
    "L_sp_2048_cond_sub_64_words_%=:\n\t"
        "ldr	r6, [%[b], r7]\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, %[m]\n\t"
#elif defined(__clang__)
        "ands	r6, %[m]\n\t"
#else
        "and	r6, %[m]\n\t"
#endif
        "movs	r5, #0\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r5, r5, r4\n\t"
#else
        "sub	r5, r5, r4\n\t"
#endif
        "ldr	r5, [%[a], r7]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r4\n\t"
#elif defined(__clang__)
        "sbcs	r4, r4\n\t"
#else
        "sbc	r4, r4\n\t"
#endif
        "str	r5, [%[r], r7]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r7, r7, #4\n\t"
#else
        "add	r7, r7, #4\n\t"
#endif
        "cmp	r7, r8\n\t"
        "blt	L_sp_2048_cond_sub_64_words_%=\n\t"
        "movs	%[r], r4\n\t"
        : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b), [m] "+l" (m)
        :
        : "memory", "r4", "r5", "r6", "r7", "r8", "cc"
    );
    return (word32)(size_t)r;
}

#define sp_2048_mont_reduce_order_64   sp_2048_mont_reduce_64
/* Reduce the number back to 2048 bits using Montgomery reduction.
 *
 * a   A single precision number to reduce in place.
 * m   The single precision number representing the modulus.
 * mp  The digit representing the negative inverse of m mod 2^n.
 */
SP_NOINLINE static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m,
        sp_digit mp)
{
    __asm__ __volatile__ (
        "movs	r7, #0\n\t"
        "mov	r8, %[mp]\n\t"
        "mov	r12, r7\n\t"
        "mov	lr, %[m]\n\t"
        "mov	r9, %[a]\n\t"
        "mov	r11, %[a]\n\t"
        "movs	r5, #0xfc\n\t"
        "movs	r6, #0xff\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r6, r6, #1\n\t"
#else
        "add	r6, r6, #1\n\t"
#endif
        "add	r9, r9, r5\n\t"
        "add	r11, r11, r6\n\t"
        "\n"
    "L_sp_2048_mont_reduce_64_mod_%=:\n\t"
        "movs	r7, #0\n\t"
        "movs	r4, #0\n\t"
        "# a[i] += m[0] * mu\n\t"
        "ldm	%[m]!, {%[mp]}\n\t"
        "ldm	%[a]!, {r3}\n\t"
        "# mu = a[i] * mp\n\t"
        "mov	r5, r8\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r3, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r3\n\t"
#else
        "mul	r5, r3\n\t"
#endif
        "mov	r10, r5\n\t"
        "# Multiply m[0] and mu - Start\n\t"
        "mov	r5, r10\n\t"
        "uxth	r6, %[mp]\n\t"
        "uxth	r5, r5\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[mp], #16\n\t"
#else
        "lsr	r6, %[mp], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "mov	r5, r10\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[mp], #16\n\t"
#else
        "lsr	r6, %[mp], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, r5, #16\n\t"
#else
        "lsr	r5, r5, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
        "uxth	r6, %[mp]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "# Multiply m[0] and mu - Done\n\t"
        "\n"
    "L_sp_2048_mont_reduce_64_word_%=:\n\t"
        "# a[i+j] += m[j] * mu\n\t"
        "ldr	r3, [%[a]]\n\t"
        "ldm	%[m]!, {%[mp]}\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r4\n\t"
#else
        "add	r3, r3, r4\n\t"
#endif
        "movs	r4, #0\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
        "# Multiply m[j] and mu - Start\n\t"
        "mov	r5, r10\n\t"
        "uxth	r6, %[mp]\n\t"
        "uxth	r5, r5\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[mp], #16\n\t"
#else
        "lsr	r6, %[mp], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "mov	r5, r10\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[mp], #16\n\t"
#else
        "lsr	r6, %[mp], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, r5, #16\n\t"
#else
        "lsr	r5, r5, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
        "uxth	r6, %[mp]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "adcs	r4, r6\n\t"
#else
        "adc	r4, r6\n\t"
#endif
        "# Multiply m[j] and mu - Done\n\t"
        "stm	%[a]!, {r3}\n\t"
        "cmp	%[a], r9\n\t"
        "blt	L_sp_2048_mont_reduce_64_word_%=\n\t"
        "# a[i+63] += m[63] * mu\n\t"
        "ldr	%[mp], [%[m]]\n\t"
        "mov	r3, r12\n\t"
        "# Multiply m[63] and mu - Start\n\t"
        "mov	r5, r10\n\t"
        "uxth	r6, %[mp]\n\t"
        "uxth	r5, r5\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r7, r7, r7\n\t"
#elif defined(__clang__)
        "adcs	r7, r7\n\t"
#else
        "adc	r7, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[mp], #16\n\t"
#else
        "lsr	r6, %[mp], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r7, r7, r7\n\t"
#elif defined(__clang__)
        "adcs	r7, r7\n\t"
#else
        "adc	r7, r7\n\t"
#endif
        "mov	r5, r10\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[mp], #16\n\t"
#else
        "lsr	r6, %[mp], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, r5, #16\n\t"
#else
        "lsr	r5, r5, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r5, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r5\n\t"
#else
        "mul	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r7, r7, r7\n\t"
#elif defined(__clang__)
        "adcs	r7, r7\n\t"
#else
        "adc	r7, r7\n\t"
#endif
        "uxth	r6, %[mp]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r5, r6, r5\n\t"
#elif defined(__clang__)
        "muls	r5, r6\n\t"
#else
        "mul	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #16\n\t"
#else
        "lsr	r6, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, #16\n\t"
#else
        "lsl	r5, r5, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r5\n\t"
#else
        "add	r4, r4, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "adcs	r3, r6\n\t"
#else
        "adc	r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r7, r7, r7\n\t"
#elif defined(__clang__)
        "adcs	r7, r7\n\t"
#else
        "adc	r7, r7\n\t"
#endif
        "# Multiply m[63] and mu - Done\n\t"
        "ldr	r5, [%[a]]\n\t"
        "ldr	r6, [%[a], #4]\n\t"
        "movs	%[mp], #0\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r4\n\t"
#else
        "add	r5, r5, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r6, r6, r3\n\t"
#elif defined(__clang__)
        "adcs	r6, r3\n\t"
#else
        "adc	r6, r3\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r7, r7, %[mp]\n\t"
#elif defined(__clang__)
        "adcs	r7, %[mp]\n\t"
#else
        "adc	r7, %[mp]\n\t"
#endif
        "stm	%[a]!, {r5, r6}\n\t"
        "# i += 1\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	%[a], %[a], #4\n\t"
#else
        "sub	%[a], %[a], #4\n\t"
#endif
        "movs	r3, #0xfc\n\t"
        "mov	r9, %[a]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	%[a], %[a], r3\n\t"
#else
        "sub	%[a], %[a], r3\n\t"
#endif
        "mov	r12, r7\n\t"
        "mov	%[m], lr\n\t"
        "cmp	r11, %[a]\n\t"
        "bgt	L_sp_2048_mont_reduce_64_mod_%=\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "negs	r7, r7\n\t"
#else
        "neg	r7, r7\n\t"
#endif
        "# Subtract masked modulus\n\t"
        "movs	r4, #0xff\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, #1\n\t"
#else
        "add	r4, r4, #1\n\t"
#endif
        "movs	%[mp], #0\n\t"
        "movs	r3, #0\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	%[a], %[a], r4\n\t"
#else
        "sub	%[a], %[a], r4\n\t"
#endif
#ifndef WOLFSSL_SP_LARGE_CODE
        "\n"
    "L_sp_2048_mont_reduce_64_sub_mask_%=:\n\t"
        "ldm	%[m]!, {r6}\n\t"
        "movs	r5, #0\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r5, r5, %[mp]\n\t"
#else
        "sub	r5, r5, %[mp]\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	%[mp], %[mp], %[mp]\n\t"
#elif defined(__clang__)
        "sbcs	%[mp], %[mp]\n\t"
#else
        "sbc	%[mp], %[mp]\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, #4\n\t"
#else
        "add	r3, r3, #4\n\t"
#endif
        "cmp	r3, r4\n\t"
        "blt	L_sp_2048_mont_reduce_64_sub_mask_%=\n\t"
#else /* WOLFSSL_SP_LARGE_CODE */
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r5, r5, r6\n\t"
#else
        "sub	r5, r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
        "ldm	%[m]!, {r6}\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r7\n\t"
#elif defined(__clang__)
        "ands	r6, r7\n\t"
#else
        "and	r6, r7\n\t"
#endif
        "ldr	r5, [%[a], r4]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "sbcs	r5, r6\n\t"
#else
        "sbc	r5, r6\n\t"
#endif
        "stm	%[a]!, {r5}\n\t"
#endif /* WOLFSSL_SP_LARGE_CODE */
        : [a] "+l" (a), [m] "+l" (m), [mp] "+l" (mp)
        :
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc"
    );
}

/* Multiply two Montgomery form numbers mod the modulus (prime).
 * (r = a * b mod m)
 *
 * r   Result of multiplication.
 * a   First number to multiply in Montgomery form.
 * b   Second number to multiply in Montgomery form.
 * m   Modulus (prime).
 * mp  Montgomery multiplier.
 */
SP_NOINLINE static void sp_2048_mont_mul_64(sp_digit* r, const sp_digit* a,
        const sp_digit* b, const sp_digit* m, sp_digit mp)
{
    sp_2048_mul_64(r, a, b);
    sp_2048_mont_reduce_64(r, m, mp);
}

/* Square the Montgomery form number. (r = a * a mod m)
 *
 * r   Result of squaring.
 * a   Number to square in Montgomery form.
 * m   Modulus (prime).
 * mp  Montgomery multiplier.
 */
SP_NOINLINE static void sp_2048_mont_sqr_64(sp_digit* r, const sp_digit* a,
        const sp_digit* m, sp_digit mp)
{
    sp_2048_sqr_64(r, a);
    sp_2048_mont_reduce_64(r, m, mp);
}

#ifdef WOLFSSL_SP_SMALL
/* Sub b from a into r. (r = a - b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
SP_NOINLINE static sp_digit sp_2048_sub_64(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    __asm__ __volatile__ (
        "movs	r6, %[a]\n\t"
        "movs	r3, #0\n\t"
        "movs	r5, #0xff\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, #1\n\t"
#else
        "add	r5, r5, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r6, r6, r5\n\t"
#else
        "add	r6, r6, r5\n\t"
#endif
        "\n"
    "L_sp_2048_sub_64_word_%=:\n\t"
        "movs	r5, #0\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r5, r5, r3\n\t"
#else
        "sub	r5, r5, r3\n\t"
#endif
        "ldr	r4, [%[a]]\n\t"
        "ldr	r5, [%[b]]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r5\n\t"
#elif defined(__clang__)
        "sbcs	r4, r5\n\t"
#else
        "sbc	r4, r5\n\t"
#endif
        "str	r4, [%[r]]\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r3\n\t"
#elif defined(__clang__)
        "sbcs	r3, r3\n\t"
#else
        "sbc	r3, r3\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	%[a], %[a], #4\n\t"
#else
        "add	%[a], %[a], #4\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	%[b], %[b], #4\n\t"
#else
        "add	%[b], %[b], #4\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	%[r], %[r], #4\n\t"
#else
        "add	%[r], %[r], #4\n\t"
#endif
        "cmp	%[a], r6\n\t"
        "bne	L_sp_2048_sub_64_word_%=\n\t"
        "movs	%[r], r3\n\t"
        : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b)
        :
        : "memory", "r3", "r4", "r5", "r6", "cc"
    );
    return (word32)(size_t)r;
}

#else
/* Sub b from a into r. (r = a - b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
SP_NOINLINE static sp_digit sp_2048_sub_64(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    __asm__ __volatile__ (
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r3, r3, r5\n\t"
#else
        "sub	r3, r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
        "ldm	%[b]!, {r5, r6}\n\t"
        "ldm	%[a]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	r3, r3, r5\n\t"
#elif defined(__clang__)
        "sbcs	r3, r5\n\t"
#else
        "sbc	r3, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "sbcs	r4, r6\n\t"
#else
        "sbc	r4, r6\n\t"
#endif
        "stm	%[r]!, {r3, r4}\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	%[r], %[r], %[r]\n\t"
#elif defined(__clang__)
        "sbcs	%[r], %[r]\n\t"
#else
        "sbc	%[r], %[r]\n\t"
#endif
        : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b)
        :
        : "memory", "r3", "r4", "r5", "r6", "cc"
    );
    return (word32)(size_t)r;
}

#endif /* WOLFSSL_SP_SMALL */
/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div)
 *
 * d1   The high order half of the number to divide.
 * d0   The low order half of the number to divide.
 * div  The divisor.
 * returns the result of the division.
 *
 * Note that this is an approximate div. It may give an answer 1 larger.
 */
SP_NOINLINE static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0,
        sp_digit div)
{
    __asm__ __volatile__ (
        "movs	r3, #0\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[div], #1\n\t"
#else
        "lsr	r5, %[div], #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, #1\n\t"
#else
        "add	r5, r5, #1\n\t"
#endif
        "mov	r8, %[d0]\n\t"
        "mov	r9, %[d1]\n\t"
        "# Do top 32\n\t"
        "movs	r6, r5\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r6, r6, %[d1]\n\t"
#else
        "sub	r6, r6, %[d1]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r6, r6, r6\n\t"
#elif defined(__clang__)
        "sbcs	r6, r6\n\t"
#else
        "sbc	r6, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r3\n\t"
#else
        "add	r3, r3, r3\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r3, r3, r6\n\t"
#else
        "sub	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r5\n\t"
#elif defined(__clang__)
        "ands	r6, r5\n\t"
#else
        "and	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	%[d1], %[d1], r6\n\t"
#else
        "sub	%[d1], %[d1], r6\n\t"
#endif
        "movs	r4, #29\n\t"
        "\n"
    "L_div_2048_word_64_loop_%=:\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	%[d0], %[d0], #1\n\t"
#else
        "lsl	%[d0], %[d0], #1\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	%[d1], %[d1], %[d1]\n\t"
#elif defined(__clang__)
        "adcs	%[d1], %[d1]\n\t"
#else
        "adc	%[d1], %[d1]\n\t"
#endif
        "movs	r6, r5\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r6, r6, %[d1]\n\t"
#else
        "sub	r6, r6, %[d1]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r6, r6, r6\n\t"
#elif defined(__clang__)
        "sbcs	r6, r6\n\t"
#else
        "sbc	r6, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r3\n\t"
#else
        "add	r3, r3, r3\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r3, r3, r6\n\t"
#else
        "sub	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, r5\n\t"
#elif defined(__clang__)
        "ands	r6, r5\n\t"
#else
        "and	r6, r5\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	%[d1], %[d1], r6\n\t"
#else
        "sub	%[d1], %[d1], r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r4, r4, #1\n\t"
#else
        "sub	r4, r4, #1\n\t"
#endif
        "bpl	L_div_2048_word_64_loop_%=\n\t"
        "movs	r7, #0\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r3\n\t"
#else
        "add	r3, r3, r3\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, #1\n\t"
#else
        "add	r3, r3, #1\n\t"
#endif
        "# r * div - Start\n\t"
        "uxth	%[d1], r3\n\t"
        "uxth	r4, %[div]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r4, %[d1], r4\n\t"
#elif defined(__clang__)
        "muls	r4, %[d1]\n\t"
#else
        "mul	r4, %[d1]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[div], #16\n\t"
#else
        "lsr	r6, %[div], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	%[d1], r6, %[d1]\n\t"
#elif defined(__clang__)
        "muls	%[d1], r6\n\t"
#else
        "mul	%[d1], r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[d1], #16\n\t"
#else
        "lsr	r5, %[d1], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	%[d1], %[d1], #16\n\t"
#else
        "lsl	%[d1], %[d1], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, %[d1]\n\t"
#else
        "add	r4, r4, %[d1]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	%[d1], r3, #16\n\t"
#else
        "lsr	%[d1], r3, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, %[d1], r6\n\t"
#elif defined(__clang__)
        "muls	r6, %[d1]\n\t"
#else
        "mul	r6, %[d1]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
        "uxth	r6, %[div]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	%[d1], r6, %[d1]\n\t"
#elif defined(__clang__)
        "muls	%[d1], r6\n\t"
#else
        "mul	%[d1], r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[d1], #16\n\t"
#else
        "lsr	r6, %[d1], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	%[d1], %[d1], #16\n\t"
#else
        "lsl	%[d1], %[d1], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, %[d1]\n\t"
#else
        "add	r4, r4, %[d1]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "adcs	r5, r6\n\t"
#else
        "adc	r5, r6\n\t"
#endif
        "# r * div - Done\n\t"
        "mov	%[d1], r8\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	%[d1], %[d1], r4\n\t"
#else
        "sub	%[d1], %[d1], r4\n\t"
#endif
        "movs	r4, %[d1]\n\t"
        "mov	%[d1], r9\n\t"
#ifdef WOLFSSL_KEIL
        "sbcs	%[d1], %[d1], r5\n\t"
#elif defined(__clang__)
        "sbcs	%[d1], r5\n\t"
#else
        "sbc	%[d1], r5\n\t"
#endif
        "movs	r5, %[d1]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
        "# r * div - Start\n\t"
        "uxth	%[d1], r3\n\t"
        "uxth	r4, %[div]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r4, %[d1], r4\n\t"
#elif defined(__clang__)
        "muls	r4, %[d1]\n\t"
#else
        "mul	r4, %[d1]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[div], #16\n\t"
#else
        "lsr	r6, %[div], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	%[d1], r6, %[d1]\n\t"
#elif defined(__clang__)
        "muls	%[d1], r6\n\t"
#else
        "mul	%[d1], r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[d1], #16\n\t"
#else
        "lsr	r5, %[d1], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	%[d1], %[d1], #16\n\t"
#else
        "lsl	%[d1], %[d1], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, %[d1]\n\t"
#else
        "add	r4, r4, %[d1]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	%[d1], r3, #16\n\t"
#else
        "lsr	%[d1], r3, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, %[d1], r6\n\t"
#elif defined(__clang__)
        "muls	r6, %[d1]\n\t"
#else
        "mul	r6, %[d1]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
        "uxth	r6, %[div]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	%[d1], r6, %[d1]\n\t"
#elif defined(__clang__)
        "muls	%[d1], r6\n\t"
#else
        "mul	%[d1], r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[d1], #16\n\t"
#else
        "lsr	r6, %[d1], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	%[d1], %[d1], #16\n\t"
#else
        "lsl	%[d1], %[d1], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, %[d1]\n\t"
#else
        "add	r4, r4, %[d1]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "adcs	r5, r6\n\t"
#else
        "adc	r5, r6\n\t"
#endif
        "# r * div - Done\n\t"
        "mov	%[d1], r8\n\t"
        "mov	r6, r9\n\t"
#ifdef WOLFSSL_KEIL
        "subs	r4, %[d1], r4\n\t"
#else
#ifdef __clang__
        "subs	r4, %[d1], r4\n\t"
#else
        "sub	r4, %[d1], r4\n\t"
#endif
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r6, r6, r5\n\t"
#elif defined(__clang__)
        "sbcs	r6, r5\n\t"
#else
        "sbc	r6, r5\n\t"
#endif
        "movs	r5, r6\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
        "# r * div - Start\n\t"
        "uxth	%[d1], r3\n\t"
        "uxth	r4, %[div]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r4, %[d1], r4\n\t"
#elif defined(__clang__)
        "muls	r4, %[d1]\n\t"
#else
        "mul	r4, %[d1]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[div], #16\n\t"
#else
        "lsr	r6, %[div], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	%[d1], r6, %[d1]\n\t"
#elif defined(__clang__)
        "muls	%[d1], r6\n\t"
#else
        "mul	%[d1], r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[d1], #16\n\t"
#else
        "lsr	r5, %[d1], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	%[d1], %[d1], #16\n\t"
#else
        "lsl	%[d1], %[d1], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, %[d1]\n\t"
#else
        "add	r4, r4, %[d1]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	%[d1], r3, #16\n\t"
#else
        "lsr	%[d1], r3, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, %[d1], r6\n\t"
#elif defined(__clang__)
        "muls	r6, %[d1]\n\t"
#else
        "mul	r6, %[d1]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
        "uxth	r6, %[div]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	%[d1], r6, %[d1]\n\t"
#elif defined(__clang__)
        "muls	%[d1], r6\n\t"
#else
        "mul	%[d1], r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[d1], #16\n\t"
#else
        "lsr	r6, %[d1], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	%[d1], %[d1], #16\n\t"
#else
        "lsl	%[d1], %[d1], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, %[d1]\n\t"
#else
        "add	r4, r4, %[d1]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "adcs	r5, r6\n\t"
#else
        "adc	r5, r6\n\t"
#endif
        "# r * div - Done\n\t"
        "mov	%[d1], r8\n\t"
        "mov	r6, r9\n\t"
#ifdef WOLFSSL_KEIL
        "subs	r4, %[d1], r4\n\t"
#else
#ifdef __clang__
        "subs	r4, %[d1], r4\n\t"
#else
        "sub	r4, %[d1], r4\n\t"
#endif
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r6, r6, r5\n\t"
#elif defined(__clang__)
        "sbcs	r6, r5\n\t"
#else
        "sbc	r6, r5\n\t"
#endif
        "movs	r5, r6\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
        "# r * div - Start\n\t"
        "uxth	%[d1], r3\n\t"
        "uxth	r4, %[div]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r4, %[d1], r4\n\t"
#elif defined(__clang__)
        "muls	r4, %[d1]\n\t"
#else
        "mul	r4, %[d1]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[div], #16\n\t"
#else
        "lsr	r6, %[div], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	%[d1], r6, %[d1]\n\t"
#elif defined(__clang__)
        "muls	%[d1], r6\n\t"
#else
        "mul	%[d1], r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, %[d1], #16\n\t"
#else
        "lsr	r5, %[d1], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	%[d1], %[d1], #16\n\t"
#else
        "lsl	%[d1], %[d1], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, %[d1]\n\t"
#else
        "add	r4, r4, %[d1]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	%[d1], r3, #16\n\t"
#else
        "lsr	%[d1], r3, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, %[d1], r6\n\t"
#elif defined(__clang__)
        "muls	r6, %[d1]\n\t"
#else
        "mul	r6, %[d1]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
        "uxth	r6, %[div]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	%[d1], r6, %[d1]\n\t"
#elif defined(__clang__)
        "muls	%[d1], r6\n\t"
#else
        "mul	%[d1], r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[d1], #16\n\t"
#else
        "lsr	r6, %[d1], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	%[d1], %[d1], #16\n\t"
#else
        "lsl	%[d1], %[d1], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, %[d1]\n\t"
#else
        "add	r4, r4, %[d1]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "adcs	r5, r6\n\t"
#else
        "adc	r5, r6\n\t"
#endif
        "# r * div - Done\n\t"
        "mov	%[d1], r8\n\t"
        "mov	r6, r9\n\t"
#ifdef WOLFSSL_KEIL
        "subs	r4, %[d1], r4\n\t"
#else
#ifdef __clang__
        "subs	r4, %[d1], r4\n\t"
#else
        "sub	r4, %[d1], r4\n\t"
#endif
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r6, r6, r5\n\t"
#elif defined(__clang__)
        "sbcs	r6, r5\n\t"
#else
        "sbc	r6, r5\n\t"
#endif
        "movs	r5, r6\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r5\n\t"
#else
        "add	r3, r3, r5\n\t"
#endif
        "movs	r6, %[div]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r6, r6, r4\n\t"
#else
        "sub	r6, r6, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r6, r6, r6\n\t"
#elif defined(__clang__)
        "sbcs	r6, r6\n\t"
#else
        "sbc	r6, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r3, r3, r6\n\t"
#else
        "sub	r3, r3, r6\n\t"
#endif
        "movs	%[d1], r3\n\t"
        : [d1] "+l" (d1), [d0] "+l" (d0), [div] "+l" (div)
        :
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc"
    );
    return (word32)(size_t)d1;
}

/* Divide d in a and put remainder into r (m*d + r = a)
 * m is not calculated as it is not needed at this time.
 *
 * a  Number to be divided.
 * d  Number to divide with.
 * m  Multiplier result.
 * r  Remainder from the division.
 * returns MP_OKAY indicating success.
 */
static WC_INLINE int sp_2048_div_64_cond(const sp_digit* a, const sp_digit* d,
        sp_digit* m, sp_digit* r)
{
    sp_digit t1[128], t2[65];
    sp_digit div, r1;
    int i;

    (void)m;

    div = d[63];
    XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
    for (i = 63; i > 0; i--) {
        if (t1[i + 64] != d[i])
            break;
    }
    if (t1[i + 64] >= d[i]) {
        sp_2048_sub_in_place_64(&t1[64], d);
    }
    for (i = 63; i >= 0; i--) {
        if (t1[64 + i] == div) {
            r1 = SP_DIGIT_MAX;
        }
        else {
            r1 = div_2048_word_64(t1[64 + i], t1[64 + i - 1], div);
        }

        sp_2048_mul_d_64(t2, d, r1);
        t1[64 + i] += sp_2048_sub_in_place_64(&t1[i], t2);
        t1[64 + i] -= t2[64];
        if (t1[64 + i] != 0) {
            t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], d);
            if (t1[64 + i] != 0)
                t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], d);
        }
    }

    for (i = 63; i > 0; i--) {
        if (t1[i] != d[i])
            break;
    }
    if (t1[i] >= d[i]) {
        sp_2048_sub_64(r, t1, d);
    }
    else {
        XMEMCPY(r, t1, sizeof(*t1) * 64);
    }

    return MP_OKAY;
}

/* Reduce a modulo m into r. (r = a mod m)
 *
 * r  A single precision number that is the reduced result.
 * a  A single precision number that is to be reduced.
 * m  A single precision number that is the modulus to reduce with.
 * returns MP_OKAY indicating success.
 */
static WC_INLINE int sp_2048_mod_64_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
{
    int ret;
    ret = sp_2048_div_64_cond(a, m, NULL, r);
    return ret;
}

#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH)
/* AND m into each word of a and store in r.
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * m  Mask to AND against each digit.
 */
static void sp_2048_mask_64(sp_digit* r, const sp_digit* a, sp_digit m)
{
#ifdef WOLFSSL_SP_SMALL
    int i;

    for (i=0; i<64; i++) {
        r[i] = a[i] & m;
    }
#else
    int i;

    for (i = 0; i < 64; i += 8) {
        r[i+0] = a[i+0] & m;
        r[i+1] = a[i+1] & m;
        r[i+2] = a[i+2] & m;
        r[i+3] = a[i+3] & m;
        r[i+4] = a[i+4] & m;
        r[i+5] = a[i+5] & m;
        r[i+6] = a[i+6] & m;
        r[i+7] = a[i+7] & m;
    }
#endif
}

/* Compare a with b in constant time.
 *
 * a  A single precision integer.
 * b  A single precision integer.
 * return -ve, 0 or +ve if a is less than, equal to or greater than b
 * respectively.
 */
SP_NOINLINE static sp_int32 sp_2048_cmp_64(const sp_digit* a, const sp_digit* b)
{
    __asm__ __volatile__ (
        "movs	r2, #0\n\t"
        "movs	r3, #0\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "mvns	r3, r3\n\t"
#else
        "mvn	r3, r3\n\t"
#endif
        "movs	r6, #0xfc\n\t"
        "\n"
    "L_sp_2048_cmp_64_words_%=:\n\t"
        "ldr	r7, [%[a], r6]\n\t"
        "ldr	r5, [%[b], r6]\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r7, r7, r3\n\t"
#elif defined(__clang__)
        "ands	r7, r3\n\t"
#else
        "and	r7, r3\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "ands	r5, r5, r3\n\t"
#elif defined(__clang__)
        "ands	r5, r3\n\t"
#else
        "and	r5, r3\n\t"
#endif
        "movs	r4, r7\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r7, r7, r5\n\t"
#else
        "sub	r7, r7, r5\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r7, r7, r7\n\t"
#elif defined(__clang__)
        "sbcs	r7, r7\n\t"
#else
        "sbc	r7, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r2, r2, r7\n\t"
#else
        "add	r2, r2, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "mvns	r7, r7\n\t"
#else
        "mvn	r7, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "ands	r3, r3, r7\n\t"
#elif defined(__clang__)
        "ands	r3, r7\n\t"
#else
        "and	r3, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r5, r5, r4\n\t"
#else
        "sub	r5, r5, r4\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r7, r7, r7\n\t"
#elif defined(__clang__)
        "sbcs	r7, r7\n\t"
#else
        "sbc	r7, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r2, r2, r7\n\t"
#else
        "sub	r2, r2, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "mvns	r7, r7\n\t"
#else
        "mvn	r7, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "ands	r3, r3, r7\n\t"
#elif defined(__clang__)
        "ands	r3, r7\n\t"
#else
        "and	r3, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r6, r6, #4\n\t"
#else
        "sub	r6, r6, #4\n\t"
#endif
        "bge	L_sp_2048_cmp_64_words_%=\n\t"
        "movs	%[a], r2\n\t"
        : [a] "+l" (a), [b] "+l" (b)
        :
        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "cc"
    );
    return (word32)(size_t)a;
}

/* Divide d in a and put remainder into r (m*d + r = a)
 * m is not calculated as it is not needed at this time.
 *
 * a  Number to be divided.
 * d  Number to divide with.
 * m  Multiplier result.
 * r  Remainder from the division.
 * returns MP_OKAY indicating success.
 */
static WC_INLINE int sp_2048_div_64(const sp_digit* a, const sp_digit* d,
        sp_digit* m, sp_digit* r)
{
    sp_digit t1[128], t2[65];
    sp_digit div, r1;
    int i;

    (void)m;

    div = d[63];
    XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
    r1 = sp_2048_cmp_64(&t1[64], d) >= 0;
    sp_2048_cond_sub_64(&t1[64], &t1[64], d, (sp_digit)0 - r1);
    for (i = 63; i >= 0; i--) {
        volatile sp_digit mask = (sp_digit)0 - (t1[64 + i] == div);
        sp_digit hi = t1[64 + i] + mask;
        r1 = div_2048_word_64(hi, t1[64 + i - 1], div);
        r1 |= mask;

        sp_2048_mul_d_64(t2, d, r1);
        t1[64 + i] += sp_2048_sub_in_place_64(&t1[i], t2);
        t1[64 + i] -= t2[64];
        sp_2048_mask_64(t2, d, t1[64 + i]);
        t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], t2);
        sp_2048_mask_64(t2, d, t1[64 + i]);
        t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], t2);
    }

    r1 = sp_2048_cmp_64(t1, d) >= 0;
    sp_2048_cond_sub_64(r, t1, d, (sp_digit)0 - r1);

    return MP_OKAY;
}

/* Reduce a modulo m into r. (r = a mod m)
 *
 * r  A single precision number that is the reduced result.
 * a  A single precision number that is to be reduced.
 * m  A single precision number that is the modulus to reduce with.
 * returns MP_OKAY indicating success.
 */
static WC_INLINE int sp_2048_mod_64(sp_digit* r, const sp_digit* a, const sp_digit* m)
{
    int ret;
    ret = sp_2048_div_64(a, m, NULL, r);
    return ret;
}

#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
                                                     defined(WOLFSSL_HAVE_SP_DH)
#ifdef WOLFSSL_SP_SMALL
/* Modular exponentiate a to the e mod m. (r = a^e mod m)
 *
 * r     A single precision number that is the result of the operation.
 * a     A single precision number being exponentiated.
 * e     A single precision number that is the exponent.
 * bits  The number of bits in the exponent.
 * m     A single precision number that is the modulus.
 * returns  0 on success.
 * returns  MEMORY_E on dynamic memory allocation failure.
 * returns  MP_VAL when base is even or exponent is 0.
 */
static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e,
        int bits, const sp_digit* m, int reduceA)
{
    SP_DECL_VAR(sp_digit, td, 8 * 128);
    sp_digit* t[8];
    sp_digit* norm = NULL;
    sp_digit mp = 1;
    sp_digit n;
    sp_digit mask;
    int i;
    int c;
    byte y;
    int err = MP_OKAY;

    if (bits == 0) {
        err = MP_VAL;
    }

    SP_ALLOC_VAR(sp_digit, td, 8 * 128, NULL, DYNAMIC_TYPE_TMP_BUFFER);
    if (err == MP_OKAY) {
        norm = td;
        for (i=0; i<8; i++) {
            t[i] = td + i * 128;
        }

        sp_2048_mont_setup(m, &mp);
        sp_2048_mont_norm_64(norm, m);

        XMEMSET(t[1], 0, sizeof(sp_digit) * 64U);
        if (reduceA != 0) {
            err = sp_2048_mod_64(t[1] + 64, a, m);
            if (err == MP_OKAY) {
                err = sp_2048_mod_64(t[1], t[1], m);
            }
        }
        else {
            XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64);
            err = sp_2048_mod_64(t[1], t[1], m);
        }
    }

    if (err == MP_OKAY) {
        sp_2048_mont_sqr_64(t[ 2], t[ 1], m, mp);
        sp_2048_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp);
        sp_2048_mont_sqr_64(t[ 4], t[ 2], m, mp);
        sp_2048_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp);
        sp_2048_mont_sqr_64(t[ 6], t[ 3], m, mp);
        sp_2048_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp);

        i = (bits - 1) / 32;
        n = e[i--];
        c = bits & 31;
        if (c == 0) {
            c = 32;
        }
        c -= bits % 3;
        if (c == 32) {
            c = 29;
        }
        if (c < 0) {
            /* Number of bits in top word is less than number needed. */
            c = -c;
            y = (byte)(n << c);
            n = e[i--];
            y |= (byte)(n >> (64 - c));
            n <<= c;
            c = 64 - c;
        }
        else if (c == 0) {
            /* All bits in top word used. */
            y = (byte)n;
        }
        else {
            y = (byte)(n >> c);
            n <<= 32 - c;
        }
        XMEMCPY(r, t[y], sizeof(sp_digit) * 64);
        for (; i>=0 || c>=3; ) {
            if (c == 0) {
                n = e[i--];
                y = (byte)(n >> 29);
                n <<= 3;
                c = 29;
            }
            else if (c < 3) {
                y = (byte)(n >> 29);
                n = e[i--];
                c = 3 - c;
                y |= (byte)(n >> (32 - c));
                n <<= c;
                c = 32 - c;
            }
            else {
                y = (byte)((n >> 29) & 0x7);
                n <<= 3;
                c -= 3;
            }

            sp_2048_mont_sqr_64(r, r, m, mp);
            sp_2048_mont_sqr_64(r, r, m, mp);
            sp_2048_mont_sqr_64(r, r, m, mp);

            sp_2048_mont_mul_64(r, r, t[y], m, mp);
        }

        XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
        sp_2048_mont_reduce_64(r, m, mp);

        mask = (sp_digit)0 - (sp_2048_cmp_64(r, m) >= 0);
        sp_2048_cond_sub_64(r, r, m, mask);
    }

    SP_FREE_VAR(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);

    return err;
}
#else
/* Modular exponentiate a to the e mod m. (r = a^e mod m)
 *
 * r     A single precision number that is the result of the operation.
 * a     A single precision number being exponentiated.
 * e     A single precision number that is the exponent.
 * bits  The number of bits in the exponent.
 * m     A single precision number that is the modulus.
 * returns  0 on success.
 * returns  MEMORY_E on dynamic memory allocation failure.
 * returns  MP_VAL when base is even or exponent is 0.
 */
static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e,
        int bits, const sp_digit* m, int reduceA)
{
    SP_DECL_VAR(sp_digit, td, 16 * 128);
    sp_digit* t[16];
    sp_digit* norm = NULL;
    sp_digit mp = 1;
    sp_digit n;
    sp_digit mask;
    int i;
    int c;
    byte y;
    int err = MP_OKAY;

    if (bits == 0) {
        err = MP_VAL;
    }

    SP_ALLOC_VAR(sp_digit, td, 16 * 128, NULL, DYNAMIC_TYPE_TMP_BUFFER);
    if (err == MP_OKAY) {
        norm = td;
        for (i=0; i<16; i++) {
            t[i] = td + i * 128;
        }

        sp_2048_mont_setup(m, &mp);
        sp_2048_mont_norm_64(norm, m);

        XMEMSET(t[1], 0, sizeof(sp_digit) * 64U);
        if (reduceA != 0) {
            err = sp_2048_mod_64(t[1] + 64, a, m);
            if (err == MP_OKAY) {
                err = sp_2048_mod_64(t[1], t[1], m);
            }
        }
        else {
            XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64);
            err = sp_2048_mod_64(t[1], t[1], m);
        }
    }

    if (err == MP_OKAY) {
        sp_2048_mont_sqr_64(t[ 2], t[ 1], m, mp);
        sp_2048_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp);
        sp_2048_mont_sqr_64(t[ 4], t[ 2], m, mp);
        sp_2048_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp);
        sp_2048_mont_sqr_64(t[ 6], t[ 3], m, mp);
        sp_2048_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp);
        sp_2048_mont_sqr_64(t[ 8], t[ 4], m, mp);
        sp_2048_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp);
        sp_2048_mont_sqr_64(t[10], t[ 5], m, mp);
        sp_2048_mont_mul_64(t[11], t[ 6], t[ 5], m, mp);
        sp_2048_mont_sqr_64(t[12], t[ 6], m, mp);
        sp_2048_mont_mul_64(t[13], t[ 7], t[ 6], m, mp);
        sp_2048_mont_sqr_64(t[14], t[ 7], m, mp);
        sp_2048_mont_mul_64(t[15], t[ 8], t[ 7], m, mp);

        i = (bits - 1) / 32;
        n = e[i--];
        c = bits & 31;
        if (c == 0) {
            c = 32;
        }
        c -= bits % 4;
        if (c == 32) {
            c = 28;
        }
        if (c < 0) {
            /* Number of bits in top word is less than number needed. */
            c = -c;
            y = (byte)(n << c);
            n = e[i--];
            y |= (byte)(n >> (64 - c));
            n <<= c;
            c = 64 - c;
        }
        else if (c == 0) {
            /* All bits in top word used. */
            y = (byte)n;
        }
        else {
            y = (byte)(n >> c);
            n <<= 32 - c;
        }
        XMEMCPY(r, t[y], sizeof(sp_digit) * 64);
        for (; i>=0 || c>=4; ) {
            if (c == 0) {
                n = e[i--];
                y = (byte)(n >> 28);
                n <<= 4;
                c = 28;
            }
            else if (c < 4) {
                y = (byte)(n >> 28);
                n = e[i--];
                c = 4 - c;
                y |= (byte)(n >> (32 - c));
                n <<= c;
                c = 32 - c;
            }
            else {
                y = (byte)((n >> 28) & 0xf);
                n <<= 4;
                c -= 4;
            }

            sp_2048_mont_sqr_64(r, r, m, mp);
            sp_2048_mont_sqr_64(r, r, m, mp);
            sp_2048_mont_sqr_64(r, r, m, mp);
            sp_2048_mont_sqr_64(r, r, m, mp);

            sp_2048_mont_mul_64(r, r, t[y], m, mp);
        }

        XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
        sp_2048_mont_reduce_64(r, m, mp);

        mask = (sp_digit)0 - (sp_2048_cmp_64(r, m) >= 0);
        sp_2048_cond_sub_64(r, r, m, mask);
    }

    SP_FREE_VAR(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);

    return err;
}
#endif /* WOLFSSL_SP_SMALL */
#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */

#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
#ifdef WOLFSSL_HAVE_SP_RSA
/* RSA public key operation.
 *
 * in      Array of bytes representing the number to exponentiate, base.
 * inLen   Number of bytes in base.
 * em      Public exponent.
 * mm      Modulus.
 * out     Buffer to hold big-endian bytes of exponentiation result.
 *         Must be at least 256 bytes long.
 * outLen  Number of bytes in result.
 * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
 * an array is too long and MEMORY_E when dynamic memory allocation fails.
 */
int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em,
    const mp_int* mm, byte* out, word32* outLen)
{
    SP_DECL_VAR(sp_digit, a, 64 * 5);
    sp_digit* m = NULL;
    sp_digit* r = NULL;
    sp_digit *ah = NULL;
    sp_digit e[1] = {0};
    int err = MP_OKAY;

    if (*outLen < 256) {
        err = MP_TO_E;
    }
    else if (mp_count_bits(em) > 32 || inLen > 256 ||
                                                     mp_count_bits(mm) != 2048) {
        err = MP_READ_E;
    }
    else if (mp_iseven(mm)) {
        err = MP_VAL;
    }

    SP_ALLOC_VAR(sp_digit, a, 64 * 5, NULL, DYNAMIC_TYPE_RSA);
    if (err == MP_OKAY) {
        ah = a + 64;
        r = a + 64 * 2;
        m = r + 64 * 2;

        sp_2048_from_bin(ah, 64, in, inLen);
#if DIGIT_BIT >= 32
        e[0] = em->dp[0];
#else
        e[0] = em->dp[0];
        if (em->used > 1) {
            e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
        }
#endif
        if (e[0] == 0) {
            err = MP_EXPTMOD_E;
        }
    }
    if (err == MP_OKAY) {
        sp_2048_from_mp(m, 64, mm);

        if (e[0] == 0x10001) {
            int i;
            sp_digit mp;

            sp_2048_mont_setup(m, &mp);

            /* Convert to Montgomery form. */
            XMEMSET(a, 0, sizeof(sp_digit) * 64);
            err = sp_2048_mod_64_cond(r, a, m);
            /* Montgomery form: r = a.R mod m */

            if (err == MP_OKAY) {
                /* r = a ^ 0x10000 => r = a squared 16 times */
                for (i = 15; i >= 0; i--) {
                    sp_2048_mont_sqr_64(r, r, m, mp);
                }
                /* mont_red(r.R.R) = (r.R.R / R) mod m = r.R mod m
                 * mont_red(r.R * a) = (r.R.a / R) mod m = r.a mod m
                 */
                sp_2048_mont_mul_64(r, r, ah, m, mp);

                for (i = 63; i > 0; i--) {
                    if (r[i] != m[i]) {
                        break;
                    }
                }
                if (r[i] >= m[i]) {
                    sp_2048_sub_in_place_64(r, m);
                }
            }
        }
        else if (e[0] == 0x3) {
            if (err == MP_OKAY) {
                sp_2048_sqr_64(r, ah);
                err = sp_2048_mod_64_cond(r, r, m);
            }
            if (err == MP_OKAY) {
                sp_2048_mul_64(r, ah, r);
                err = sp_2048_mod_64_cond(r, r, m);
            }
        }
        else {
            int i;
            sp_digit mp;

            sp_2048_mont_setup(m, &mp);

            /* Convert to Montgomery form. */
            XMEMSET(a, 0, sizeof(sp_digit) * 64);
            err = sp_2048_mod_64_cond(a, a, m);

            if (err == MP_OKAY) {
                for (i = 31; i >= 0; i--) {
                    if (e[0] >> i) {
                        break;
                    }
                }

                XMEMCPY(r, a, sizeof(sp_digit) * 64);
                for (i--; i >= 0; i--) {
                    sp_2048_mont_sqr_64(r, r, m, mp);
                    if (((e[0] >> i) & 1) == 1) {
                        sp_2048_mont_mul_64(r, r, a, m, mp);
                    }
                }
                XMEMSET(&r[64], 0, sizeof(sp_digit) * 64);
                sp_2048_mont_reduce_64(r, m, mp);

                for (i = 63; i > 0; i--) {
                    if (r[i] != m[i]) {
                        break;
                    }
                }
                if (r[i] >= m[i]) {
                    sp_2048_sub_in_place_64(r, m);
                }
            }
        }
    }

    if (err == MP_OKAY) {
        sp_2048_to_bin_64(r, out);
        *outLen = 256;
    }

    SP_FREE_VAR(a, NULL, DYNAMIC_TYPE_RSA);

    return err;
}

#ifndef WOLFSSL_RSA_PUBLIC_ONLY
/* Conditionally add a and b using the mask m.
 * m is -1 to add and 0 when not.
 *
 * r  A single precision number representing conditional add result.
 * a  A single precision number to add with.
 * b  A single precision number to add.
 * m  Mask value to apply.
 */
SP_NOINLINE static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a,
        const sp_digit* b, sp_digit m)
{
    __asm__ __volatile__ (
        "movs	r4, #0\n\t"
        "movs	r5, #0x80\n\t"
        "mov	r8, r5\n\t"
        "movs	r7, #0\n\t"
        "\n"
    "L_sp_2048_cond_add_32_words_%=:\n\t"
        "ldr	r6, [%[b], r7]\n\t"
#ifdef WOLFSSL_KEIL
        "ands	r6, r6, %[m]\n\t"
#elif defined(__clang__)
        "ands	r6, %[m]\n\t"
#else
        "and	r6, %[m]\n\t"
#endif
        "movs	r5, #0\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r5, r5, #1\n\t"
#else
        "sub	r5, r5, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r4\n\t"
#else
        "add	r5, r5, r4\n\t"
#endif
        "ldr	r5, [%[a], r7]\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "adcs	r5, r6\n\t"
#else
        "adc	r5, r6\n\t"
#endif
        "movs	r4, #0\n\t"
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r4\n\t"
#elif defined(__clang__)
        "adcs	r4, r4\n\t"
#else
        "adc	r4, r4\n\t"
#endif
        "str	r5, [%[r], r7]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r7, r7, #4\n\t"
#else
        "add	r7, r7, #4\n\t"
#endif
        "cmp	r7, r8\n\t"
        "blt	L_sp_2048_cond_add_32_words_%=\n\t"
        "movs	%[r], r4\n\t"
        : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b), [m] "+l" (m)
        :
        : "memory", "r4", "r5", "r6", "r7", "r8", "cc"
    );
    return (word32)(size_t)r;
}

/* RSA private key operation.
 *
 * in      Array of bytes representing the number to exponentiate, base.
 * inLen   Number of bytes in base.
 * dm      Private exponent.
 * pm      First prime.
 * qm      Second prime.
 * dpm     First prime's CRT exponent.
 * dqm     Second prime's CRT exponent.
 * qim     Inverse of second prime mod p.
 * mm      Modulus.
 * out     Buffer to hold big-endian bytes of exponentiation result.
 *         Must be at least 256 bytes long.
 * outLen  Number of bytes in result.
 * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
 * an array is too long and MEMORY_E when dynamic memory allocation fails.
 */
int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm,
    const mp_int* pm, const mp_int* qm, const mp_int* dpm, const mp_int* dqm,
    const mp_int* qim, const mp_int* mm, byte* out, word32* outLen)
{
#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
    SP_DECL_VAR(sp_digit, d, 64 * 4);
    sp_digit* a = NULL;
    sp_digit* m = NULL;
    sp_digit* r = NULL;
    int err = MP_OKAY;

    (void)pm;
    (void)qm;
    (void)dpm;
    (void)dqm;
    (void)qim;

    if (*outLen < 256U) {
        err = MP_TO_E;
    }
    if (err == MP_OKAY) {
        if (mp_count_bits(dm) > 2048) {
           err = MP_READ_E;
        }
        else if (inLen > 256) {
            err = MP_READ_E;
        }
        else if (mp_count_bits(mm) != 2048) {
            err = MP_READ_E;
        }
        else if (mp_iseven(mm)) {
            err = MP_VAL;
        }
    }

    SP_ALLOC_VAR(sp_digit, d, 64 * 4, NULL, DYNAMIC_TYPE_RSA);
    if (err == MP_OKAY) {
        a = d + 64;
        m = a + 128;
        r = a;

        sp_2048_from_bin(a, 64, in, inLen);
        sp_2048_from_mp(d, 64, dm);
        sp_2048_from_mp(m, 64, mm);
        err = sp_2048_mod_exp_64(r, a, d, 2048, m, 0);
    }

    if (err == MP_OKAY) {
        sp_2048_to_bin_64(r, out);
        *outLen = 256;
    }

    SP_ZEROFREE_VAR_ALT(sp_digit, d, a, 64, NULL, DYNAMIC_TYPE_RSA);

    return err;
#else
    SP_DECL_VAR(sp_digit, a, 32 * 11);
    sp_digit* p = NULL;
    sp_digit* q = NULL;
    sp_digit* dp = NULL;
    sp_digit* tmpa = NULL;
    sp_digit* tmpb = NULL;
    sp_digit* r = NULL;
    sp_digit* qi = NULL;
    sp_digit* dq = NULL;
    sp_digit c;
    int err = MP_OKAY;

    (void)dm;
    (void)mm;

    if (*outLen < 256) {
        err = MP_TO_E;
    }
    else if (inLen > 256 || mp_count_bits(mm) != 2048) {
        err = MP_READ_E;
    }
    else if (mp_iseven(mm)) {
        err = MP_VAL;
    }
    else if (mp_iseven(pm)) {
        err = MP_VAL;
    }
    else if (mp_iseven(qm)) {
        err = MP_VAL;
    }

    SP_ALLOC_VAR(sp_digit, a, 32 * 11, NULL, DYNAMIC_TYPE_RSA);
    if (err == MP_OKAY) {
        p = a + 64 * 2;
        q = p + 32;
        qi = dq = dp = q + 32;
        tmpa = qi + 32;
        tmpb = tmpa + 64;
        r = a;

        sp_2048_from_bin(a, 64, in, inLen);
        sp_2048_from_mp(p, 32, pm);
        sp_2048_from_mp(q, 32, qm);
        sp_2048_from_mp(dp, 32, dpm);

        err = sp_2048_mod_exp_32(tmpa, a, dp, 1024, p, 1);
    }
    if (err == MP_OKAY) {
        sp_2048_from_mp(dq, 32, dqm);
        err = sp_2048_mod_exp_32(tmpb, a, dq, 1024, q, 1);
    }

    if (err == MP_OKAY) {
        c = sp_2048_sub_in_place_32(tmpa, tmpb);
        c += sp_2048_cond_add_32(tmpa, tmpa, p, c);
        sp_2048_cond_add_32(tmpa, tmpa, p, c);

        sp_2048_from_mp(qi, 32, qim);
        sp_2048_mul_32(tmpa, tmpa, qi);
        err = sp_2048_mod_32(tmpa, tmpa, p);
    }

    if (err == MP_OKAY) {
        sp_2048_mul_32(tmpa, q, tmpa);
        XMEMSET(&tmpb[32], 0, sizeof(sp_digit) * 32);
        sp_2048_add_64(r, tmpb, tmpa);

        sp_2048_to_bin_64(r, out);
        *outLen = 256;
    }

    SP_ZEROFREE_VAR(sp_digit, a, 32 * 11, NULL, DYNAMIC_TYPE_RSA);
#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
    return err;
}
#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
#endif /* WOLFSSL_HAVE_SP_RSA */
#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
                                              !defined(WOLFSSL_RSA_PUBLIC_ONLY))
/* Convert an array of sp_digit to an mp_int.
 *
 * a  A single precision integer.
 * r  A multi-precision integer.
 */
static int sp_2048_to_mp(const sp_digit* a, mp_int* r)
{
    int err;

    err = mp_grow(r, (2048 + DIGIT_BIT - 1) / DIGIT_BIT);
    if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
#if DIGIT_BIT == 32
        XMEMCPY(r->dp, a, sizeof(sp_digit) * 64);
        r->used = 64;
        mp_clamp(r);
#elif DIGIT_BIT < 32
        int i;
        int j = 0;
        int s = 0;

        r->dp[0] = 0;
        for (i = 0; i < 64; i++) {
            r->dp[j] |= (mp_digit)(a[i] << s);
            r->dp[j] &= ((sp_digit)1 << DIGIT_BIT) - 1;
            s = DIGIT_BIT - s;
            r->dp[++j] = (mp_digit)(a[i] >> s);
            while (s + DIGIT_BIT <= 32) {
                s += DIGIT_BIT;
                r->dp[j++] &= ((sp_digit)1 << DIGIT_BIT) - 1;
                if (s == SP_WORD_SIZE) {
                    r->dp[j] = 0;
                }
                else {
                    r->dp[j] = (mp_digit)(a[i] >> s);
                }
            }
            s = 32 - s;
        }
        r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
        mp_clamp(r);
#else
        int i;
        int j = 0;
        int s = 0;

        r->dp[0] = 0;
        for (i = 0; i < 64; i++) {
            r->dp[j] |= ((mp_digit)a[i]) << s;
            if (s + 32 >= DIGIT_BIT) {
    #if DIGIT_BIT != 32 && DIGIT_BIT != 64
                r->dp[j] &= ((sp_digit)1 << DIGIT_BIT) - 1;
    #endif
                s = DIGIT_BIT - s;
                r->dp[++j] = a[i] >> s;
                s = 32 - s;
            }
            else {
                s += 32;
            }
        }
        r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
        mp_clamp(r);
#endif
    }

    return err;
}

/* Perform the modular exponentiation for Diffie-Hellman.
 *
 * base  Base. MP integer.
 * exp   Exponent. MP integer.
 * mod   Modulus. MP integer.
 * res   Result. MP integer.
 * returns 0 on success, MP_READ_E if there are too many bytes in an array
 * and MEMORY_E if memory allocation fails.
 */
int sp_ModExp_2048(const mp_int* base, const mp_int* exp, const mp_int* mod,
    mp_int* res)
{
    int err = MP_OKAY;
    sp_digit b[128];
    sp_digit e[64];
    sp_digit m[64];
    sp_digit* r = b;
    int expBits = mp_count_bits(exp);

    if (mp_count_bits(base) > 2048) {
        err = MP_READ_E;
    }
    else if (expBits > 2048) {
        err = MP_READ_E;
    }
    else if (mp_count_bits(mod) != 2048) {
        err = MP_READ_E;
    }
    else if (mp_iseven(mod)) {
        err = MP_VAL;
    }

    if (err == MP_OKAY) {
        sp_2048_from_mp(b, 64, base);
        sp_2048_from_mp(e, 64, exp);
        sp_2048_from_mp(m, 64, mod);

        err = sp_2048_mod_exp_64(r, b, e, expBits, m, 0);
    }

    if (err == MP_OKAY) {
        err = sp_2048_to_mp(r, res);
    }

    XMEMSET(e, 0, sizeof(e));

    return err;
}

#ifdef WOLFSSL_HAVE_SP_DH

#ifdef HAVE_FFDHE_2048
/* Lefy shift a by n bits into r. (r = a << n)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * n  Integer representing number of bits to shift.
 */
static void sp_2048_lshift_64(sp_digit* r, const sp_digit* a, byte n)
{
    __asm__ __volatile__ (
        "movs	r7, #31\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	r7, r7, %[n]\n\t"
#else
        "sub	r7, r7, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	%[a], %[a], #0xc0\n\t"
#else
        "add	%[a], %[a], #0xc0\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	%[r], %[r], #0xc0\n\t"
#else
        "add	%[r], %[r], #0xc0\n\t"
#endif
        "ldr	r4, [%[a], #60]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, r4, #1\n\t"
#else
        "lsr	r5, r4, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r4, r4, %[n]\n\t"
#else
        "lsl	r4, r4, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r5, r5, r7\n\t"
#else
        "lsr	r5, r5, r7\n\t"
#endif
        "ldr	r3, [%[a], #56]\n\t"
        "str	r5, [%[r], #64]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r3, #1\n\t"
#else
        "lsr	r6, r3, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r3, r3, %[n]\n\t"
#else
        "lsl	r3, r3, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "orrs	r4, r6\n\t"
#else
        "orr	r4, r6\n\t"
#endif
        "ldr	r5, [%[a], #52]\n\t"
        "str	r4, [%[r], #60]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #1\n\t"
#else
        "lsr	r6, r5, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, %[n]\n\t"
#else
        "lsl	r5, r5, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "orrs	r3, r6\n\t"
#else
        "orr	r3, r6\n\t"
#endif
        "ldr	r4, [%[a], #48]\n\t"
        "str	r3, [%[r], #56]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r4, #1\n\t"
#else
        "lsr	r6, r4, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r4, r4, %[n]\n\t"
#else
        "lsl	r4, r4, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "orrs	r5, r6\n\t"
#else
        "orr	r5, r6\n\t"
#endif
        "ldr	r3, [%[a], #44]\n\t"
        "str	r5, [%[r], #52]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r3, #1\n\t"
#else
        "lsr	r6, r3, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r3, r3, %[n]\n\t"
#else
        "lsl	r3, r3, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "orrs	r4, r6\n\t"
#else
        "orr	r4, r6\n\t"
#endif
        "ldr	r5, [%[a], #40]\n\t"
        "str	r4, [%[r], #48]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #1\n\t"
#else
        "lsr	r6, r5, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, %[n]\n\t"
#else
        "lsl	r5, r5, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "orrs	r3, r6\n\t"
#else
        "orr	r3, r6\n\t"
#endif
        "ldr	r4, [%[a], #36]\n\t"
        "str	r3, [%[r], #44]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r4, #1\n\t"
#else
        "lsr	r6, r4, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r4, r4, %[n]\n\t"
#else
        "lsl	r4, r4, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "orrs	r5, r6\n\t"
#else
        "orr	r5, r6\n\t"
#endif
        "ldr	r3, [%[a], #32]\n\t"
        "str	r5, [%[r], #40]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r3, #1\n\t"
#else
        "lsr	r6, r3, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r3, r3, %[n]\n\t"
#else
        "lsl	r3, r3, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "orrs	r4, r6\n\t"
#else
        "orr	r4, r6\n\t"
#endif
        "ldr	r5, [%[a], #28]\n\t"
        "str	r4, [%[r], #36]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #1\n\t"
#else
        "lsr	r6, r5, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, %[n]\n\t"
#else
        "lsl	r5, r5, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "orrs	r3, r6\n\t"
#else
        "orr	r3, r6\n\t"
#endif
        "ldr	r4, [%[a], #24]\n\t"
        "str	r3, [%[r], #32]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r4, #1\n\t"
#else
        "lsr	r6, r4, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r4, r4, %[n]\n\t"
#else
        "lsl	r4, r4, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "orrs	r5, r6\n\t"
#else
        "orr	r5, r6\n\t"
#endif
        "ldr	r3, [%[a], #20]\n\t"
        "str	r5, [%[r], #28]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r3, #1\n\t"
#else
        "lsr	r6, r3, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r3, r3, %[n]\n\t"
#else
        "lsl	r3, r3, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "orrs	r4, r6\n\t"
#else
        "orr	r4, r6\n\t"
#endif
        "ldr	r5, [%[a], #16]\n\t"
        "str	r4, [%[r], #24]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #1\n\t"
#else
        "lsr	r6, r5, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, %[n]\n\t"
#else
        "lsl	r5, r5, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "orrs	r3, r6\n\t"
#else
        "orr	r3, r6\n\t"
#endif
        "ldr	r4, [%[a], #12]\n\t"
        "str	r3, [%[r], #20]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r4, #1\n\t"
#else
        "lsr	r6, r4, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r4, r4, %[n]\n\t"
#else
        "lsl	r4, r4, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "orrs	r5, r6\n\t"
#else
        "orr	r5, r6\n\t"
#endif
        "ldr	r3, [%[a], #8]\n\t"
        "str	r5, [%[r], #16]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r3, #1\n\t"
#else
        "lsr	r6, r3, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r3, r3, %[n]\n\t"
#else
        "lsl	r3, r3, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "orrs	r4, r6\n\t"
#else
        "orr	r4, r6\n\t"
#endif
        "ldr	r5, [%[a], #4]\n\t"
        "str	r4, [%[r], #12]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #1\n\t"
#else
        "lsr	r6, r5, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, %[n]\n\t"
#else
        "lsl	r5, r5, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "orrs	r3, r6\n\t"
#else
        "orr	r3, r6\n\t"
#endif
        "ldr	r4, [%[a]]\n\t"
        "str	r3, [%[r], #8]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r4, #1\n\t"
#else
        "lsr	r6, r4, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r4, r4, %[n]\n\t"
#else
        "lsl	r4, r4, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "orrs	r5, r6\n\t"
#else
        "orr	r5, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	%[a], %[a], #0x40\n\t"
#else
        "sub	%[a], %[a], #0x40\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	%[r], %[r], #0x40\n\t"
#else
        "sub	%[r], %[r], #0x40\n\t"
#endif
        "ldr	r3, [%[a], #60]\n\t"
        "str	r5, [%[r], #68]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r3, #1\n\t"
#else
        "lsr	r6, r3, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r3, r3, %[n]\n\t"
#else
        "lsl	r3, r3, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "orrs	r4, r6\n\t"
#else
        "orr	r4, r6\n\t"
#endif
        "ldr	r5, [%[a], #56]\n\t"
        "str	r4, [%[r], #64]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #1\n\t"
#else
        "lsr	r6, r5, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, %[n]\n\t"
#else
        "lsl	r5, r5, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "orrs	r3, r6\n\t"
#else
        "orr	r3, r6\n\t"
#endif
        "ldr	r4, [%[a], #52]\n\t"
        "str	r3, [%[r], #60]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r4, #1\n\t"
#else
        "lsr	r6, r4, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r4, r4, %[n]\n\t"
#else
        "lsl	r4, r4, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "orrs	r5, r6\n\t"
#else
        "orr	r5, r6\n\t"
#endif
        "ldr	r3, [%[a], #48]\n\t"
        "str	r5, [%[r], #56]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r3, #1\n\t"
#else
        "lsr	r6, r3, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r3, r3, %[n]\n\t"
#else
        "lsl	r3, r3, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "orrs	r4, r6\n\t"
#else
        "orr	r4, r6\n\t"
#endif
        "ldr	r5, [%[a], #44]\n\t"
        "str	r4, [%[r], #52]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #1\n\t"
#else
        "lsr	r6, r5, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, %[n]\n\t"
#else
        "lsl	r5, r5, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "orrs	r3, r6\n\t"
#else
        "orr	r3, r6\n\t"
#endif
        "ldr	r4, [%[a], #40]\n\t"
        "str	r3, [%[r], #48]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r4, #1\n\t"
#else
        "lsr	r6, r4, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r4, r4, %[n]\n\t"
#else
        "lsl	r4, r4, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "orrs	r5, r6\n\t"
#else
        "orr	r5, r6\n\t"
#endif
        "ldr	r3, [%[a], #36]\n\t"
        "str	r5, [%[r], #44]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r3, #1\n\t"
#else
        "lsr	r6, r3, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r3, r3, %[n]\n\t"
#else
        "lsl	r3, r3, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "orrs	r4, r6\n\t"
#else
        "orr	r4, r6\n\t"
#endif
        "ldr	r5, [%[a], #32]\n\t"
        "str	r4, [%[r], #40]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #1\n\t"
#else
        "lsr	r6, r5, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, %[n]\n\t"
#else
        "lsl	r5, r5, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "orrs	r3, r6\n\t"
#else
        "orr	r3, r6\n\t"
#endif
        "ldr	r4, [%[a], #28]\n\t"
        "str	r3, [%[r], #36]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r4, #1\n\t"
#else
        "lsr	r6, r4, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r4, r4, %[n]\n\t"
#else
        "lsl	r4, r4, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "orrs	r5, r6\n\t"
#else
        "orr	r5, r6\n\t"
#endif
        "ldr	r3, [%[a], #24]\n\t"
        "str	r5, [%[r], #32]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r3, #1\n\t"
#else
        "lsr	r6, r3, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r3, r3, %[n]\n\t"
#else
        "lsl	r3, r3, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "orrs	r4, r6\n\t"
#else
        "orr	r4, r6\n\t"
#endif
        "ldr	r5, [%[a], #20]\n\t"
        "str	r4, [%[r], #28]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #1\n\t"
#else
        "lsr	r6, r5, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, %[n]\n\t"
#else
        "lsl	r5, r5, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "orrs	r3, r6\n\t"
#else
        "orr	r3, r6\n\t"
#endif
        "ldr	r4, [%[a], #16]\n\t"
        "str	r3, [%[r], #24]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r4, #1\n\t"
#else
        "lsr	r6, r4, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r4, r4, %[n]\n\t"
#else
        "lsl	r4, r4, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "orrs	r5, r6\n\t"
#else
        "orr	r5, r6\n\t"
#endif
        "ldr	r3, [%[a], #12]\n\t"
        "str	r5, [%[r], #20]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r3, #1\n\t"
#else
        "lsr	r6, r3, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r3, r3, %[n]\n\t"
#else
        "lsl	r3, r3, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "orrs	r4, r6\n\t"
#else
        "orr	r4, r6\n\t"
#endif
        "ldr	r5, [%[a], #8]\n\t"
        "str	r4, [%[r], #16]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #1\n\t"
#else
        "lsr	r6, r5, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, %[n]\n\t"
#else
        "lsl	r5, r5, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "orrs	r3, r6\n\t"
#else
        "orr	r3, r6\n\t"
#endif
        "ldr	r4, [%[a], #4]\n\t"
        "str	r3, [%[r], #12]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r4, #1\n\t"
#else
        "lsr	r6, r4, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r4, r4, %[n]\n\t"
#else
        "lsl	r4, r4, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "orrs	r5, r6\n\t"
#else
        "orr	r5, r6\n\t"
#endif
        "ldr	r3, [%[a]]\n\t"
        "str	r5, [%[r], #8]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r3, #1\n\t"
#else
        "lsr	r6, r3, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r3, r3, %[n]\n\t"
#else
        "lsl	r3, r3, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "orrs	r4, r6\n\t"
#else
        "orr	r4, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	%[a], %[a], #0x40\n\t"
#else
        "sub	%[a], %[a], #0x40\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	%[r], %[r], #0x40\n\t"
#else
        "sub	%[r], %[r], #0x40\n\t"
#endif
        "ldr	r5, [%[a], #60]\n\t"
        "str	r4, [%[r], #68]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #1\n\t"
#else
        "lsr	r6, r5, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, %[n]\n\t"
#else
        "lsl	r5, r5, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "orrs	r3, r6\n\t"
#else
        "orr	r3, r6\n\t"
#endif
        "ldr	r4, [%[a], #56]\n\t"
        "str	r3, [%[r], #64]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r4, #1\n\t"
#else
        "lsr	r6, r4, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r4, r4, %[n]\n\t"
#else
        "lsl	r4, r4, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "orrs	r5, r6\n\t"
#else
        "orr	r5, r6\n\t"
#endif
        "ldr	r3, [%[a], #52]\n\t"
        "str	r5, [%[r], #60]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r3, #1\n\t"
#else
        "lsr	r6, r3, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r3, r3, %[n]\n\t"
#else
        "lsl	r3, r3, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "orrs	r4, r6\n\t"
#else
        "orr	r4, r6\n\t"
#endif
        "ldr	r5, [%[a], #48]\n\t"
        "str	r4, [%[r], #56]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #1\n\t"
#else
        "lsr	r6, r5, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, %[n]\n\t"
#else
        "lsl	r5, r5, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "orrs	r3, r6\n\t"
#else
        "orr	r3, r6\n\t"
#endif
        "ldr	r4, [%[a], #44]\n\t"
        "str	r3, [%[r], #52]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r4, #1\n\t"
#else
        "lsr	r6, r4, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r4, r4, %[n]\n\t"
#else
        "lsl	r4, r4, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "orrs	r5, r6\n\t"
#else
        "orr	r5, r6\n\t"
#endif
        "ldr	r3, [%[a], #40]\n\t"
        "str	r5, [%[r], #48]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r3, #1\n\t"
#else
        "lsr	r6, r3, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r3, r3, %[n]\n\t"
#else
        "lsl	r3, r3, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "orrs	r4, r6\n\t"
#else
        "orr	r4, r6\n\t"
#endif
        "ldr	r5, [%[a], #36]\n\t"
        "str	r4, [%[r], #44]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #1\n\t"
#else
        "lsr	r6, r5, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, %[n]\n\t"
#else
        "lsl	r5, r5, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "orrs	r3, r6\n\t"
#else
        "orr	r3, r6\n\t"
#endif
        "ldr	r4, [%[a], #32]\n\t"
        "str	r3, [%[r], #40]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r4, #1\n\t"
#else
        "lsr	r6, r4, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r4, r4, %[n]\n\t"
#else
        "lsl	r4, r4, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "orrs	r5, r6\n\t"
#else
        "orr	r5, r6\n\t"
#endif
        "ldr	r3, [%[a], #28]\n\t"
        "str	r5, [%[r], #36]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r3, #1\n\t"
#else
        "lsr	r6, r3, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r3, r3, %[n]\n\t"
#else
        "lsl	r3, r3, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "orrs	r4, r6\n\t"
#else
        "orr	r4, r6\n\t"
#endif
        "ldr	r5, [%[a], #24]\n\t"
        "str	r4, [%[r], #32]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #1\n\t"
#else
        "lsr	r6, r5, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, %[n]\n\t"
#else
        "lsl	r5, r5, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "orrs	r3, r6\n\t"
#else
        "orr	r3, r6\n\t"
#endif
        "ldr	r4, [%[a], #20]\n\t"
        "str	r3, [%[r], #28]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r4, #1\n\t"
#else
        "lsr	r6, r4, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r4, r4, %[n]\n\t"
#else
        "lsl	r4, r4, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "orrs	r5, r6\n\t"
#else
        "orr	r5, r6\n\t"
#endif
        "ldr	r3, [%[a], #16]\n\t"
        "str	r5, [%[r], #24]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r3, #1\n\t"
#else
        "lsr	r6, r3, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r3, r3, %[n]\n\t"
#else
        "lsl	r3, r3, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "orrs	r4, r6\n\t"
#else
        "orr	r4, r6\n\t"
#endif
        "ldr	r5, [%[a], #12]\n\t"
        "str	r4, [%[r], #20]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #1\n\t"
#else
        "lsr	r6, r5, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, %[n]\n\t"
#else
        "lsl	r5, r5, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "orrs	r3, r6\n\t"
#else
        "orr	r3, r6\n\t"
#endif
        "ldr	r4, [%[a], #8]\n\t"
        "str	r3, [%[r], #16]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r4, #1\n\t"
#else
        "lsr	r6, r4, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r4, r4, %[n]\n\t"
#else
        "lsl	r4, r4, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "orrs	r5, r6\n\t"
#else
        "orr	r5, r6\n\t"
#endif
        "ldr	r3, [%[a], #4]\n\t"
        "str	r5, [%[r], #12]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r3, #1\n\t"
#else
        "lsr	r6, r3, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r3, r3, %[n]\n\t"
#else
        "lsl	r3, r3, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "orrs	r4, r6\n\t"
#else
        "orr	r4, r6\n\t"
#endif
        "ldr	r5, [%[a]]\n\t"
        "str	r4, [%[r], #8]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #1\n\t"
#else
        "lsr	r6, r5, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, %[n]\n\t"
#else
        "lsl	r5, r5, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "orrs	r3, r6\n\t"
#else
        "orr	r3, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	%[a], %[a], #0x40\n\t"
#else
        "sub	%[a], %[a], #0x40\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	%[r], %[r], #0x40\n\t"
#else
        "sub	%[r], %[r], #0x40\n\t"
#endif
        "ldr	r4, [%[a], #60]\n\t"
        "str	r3, [%[r], #68]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r4, #1\n\t"
#else
        "lsr	r6, r4, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r4, r4, %[n]\n\t"
#else
        "lsl	r4, r4, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "orrs	r5, r6\n\t"
#else
        "orr	r5, r6\n\t"
#endif
        "ldr	r3, [%[a], #56]\n\t"
        "str	r5, [%[r], #64]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r3, #1\n\t"
#else
        "lsr	r6, r3, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r3, r3, %[n]\n\t"
#else
        "lsl	r3, r3, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "orrs	r4, r6\n\t"
#else
        "orr	r4, r6\n\t"
#endif
        "ldr	r5, [%[a], #52]\n\t"
        "str	r4, [%[r], #60]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #1\n\t"
#else
        "lsr	r6, r5, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, %[n]\n\t"
#else
        "lsl	r5, r5, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "orrs	r3, r6\n\t"
#else
        "orr	r3, r6\n\t"
#endif
        "ldr	r4, [%[a], #48]\n\t"
        "str	r3, [%[r], #56]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r4, #1\n\t"
#else
        "lsr	r6, r4, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r4, r4, %[n]\n\t"
#else
        "lsl	r4, r4, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "orrs	r5, r6\n\t"
#else
        "orr	r5, r6\n\t"
#endif
        "ldr	r3, [%[a], #44]\n\t"
        "str	r5, [%[r], #52]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r3, #1\n\t"
#else
        "lsr	r6, r3, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r3, r3, %[n]\n\t"
#else
        "lsl	r3, r3, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "orrs	r4, r6\n\t"
#else
        "orr	r4, r6\n\t"
#endif
        "ldr	r5, [%[a], #40]\n\t"
        "str	r4, [%[r], #48]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #1\n\t"
#else
        "lsr	r6, r5, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, %[n]\n\t"
#else
        "lsl	r5, r5, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "orrs	r3, r6\n\t"
#else
        "orr	r3, r6\n\t"
#endif
        "ldr	r4, [%[a], #36]\n\t"
        "str	r3, [%[r], #44]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r4, #1\n\t"
#else
        "lsr	r6, r4, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r4, r4, %[n]\n\t"
#else
        "lsl	r4, r4, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "orrs	r5, r6\n\t"
#else
        "orr	r5, r6\n\t"
#endif
        "ldr	r3, [%[a], #32]\n\t"
        "str	r5, [%[r], #40]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r3, #1\n\t"
#else
        "lsr	r6, r3, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r3, r3, %[n]\n\t"
#else
        "lsl	r3, r3, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "orrs	r4, r6\n\t"
#else
        "orr	r4, r6\n\t"
#endif
        "ldr	r5, [%[a], #28]\n\t"
        "str	r4, [%[r], #36]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #1\n\t"
#else
        "lsr	r6, r5, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, %[n]\n\t"
#else
        "lsl	r5, r5, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "orrs	r3, r6\n\t"
#else
        "orr	r3, r6\n\t"
#endif
        "ldr	r4, [%[a], #24]\n\t"
        "str	r3, [%[r], #32]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r4, #1\n\t"
#else
        "lsr	r6, r4, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r4, r4, %[n]\n\t"
#else
        "lsl	r4, r4, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "orrs	r5, r6\n\t"
#else
        "orr	r5, r6\n\t"
#endif
        "ldr	r3, [%[a], #20]\n\t"
        "str	r5, [%[r], #28]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r3, #1\n\t"
#else
        "lsr	r6, r3, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r3, r3, %[n]\n\t"
#else
        "lsl	r3, r3, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "orrs	r4, r6\n\t"
#else
        "orr	r4, r6\n\t"
#endif
        "ldr	r5, [%[a], #16]\n\t"
        "str	r4, [%[r], #24]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #1\n\t"
#else
        "lsr	r6, r5, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, %[n]\n\t"
#else
        "lsl	r5, r5, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "orrs	r3, r6\n\t"
#else
        "orr	r3, r6\n\t"
#endif
        "ldr	r4, [%[a], #12]\n\t"
        "str	r3, [%[r], #20]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r4, #1\n\t"
#else
        "lsr	r6, r4, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r4, r4, %[n]\n\t"
#else
        "lsl	r4, r4, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "orrs	r5, r6\n\t"
#else
        "orr	r5, r6\n\t"
#endif
        "ldr	r3, [%[a], #8]\n\t"
        "str	r5, [%[r], #16]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r3, #1\n\t"
#else
        "lsr	r6, r3, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r3, r3, %[n]\n\t"
#else
        "lsl	r3, r3, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r4, r4, r6\n\t"
#elif defined(__clang__)
        "orrs	r4, r6\n\t"
#else
        "orr	r4, r6\n\t"
#endif
        "ldr	r5, [%[a], #4]\n\t"
        "str	r4, [%[r], #12]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r5, #1\n\t"
#else
        "lsr	r6, r5, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r5, r5, %[n]\n\t"
#else
        "lsl	r5, r5, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r3, r3, r6\n\t"
#elif defined(__clang__)
        "orrs	r3, r6\n\t"
#else
        "orr	r3, r6\n\t"
#endif
        "ldr	r4, [%[a]]\n\t"
        "str	r3, [%[r], #8]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r4, #1\n\t"
#else
        "lsr	r6, r4, #1\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r4, r4, %[n]\n\t"
#else
        "lsl	r4, r4, %[n]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, r7\n\t"
#else
        "lsr	r6, r6, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "orrs	r5, r5, r6\n\t"
#elif defined(__clang__)
        "orrs	r5, r6\n\t"
#else
        "orr	r5, r6\n\t"
#endif
        "str	r4, [%[r]]\n\t"
        "str	r5, [%[r], #4]\n\t"
        : [r] "+l" (r), [a] "+l" (a), [n] "+l" (n)
        :
        : "memory", "r3", "r4", "r5", "r6", "r7", "cc"
    );
}

/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
 *
 * r     A single precision number that is the result of the operation.
 * e     A single precision number that is the exponent.
 * bits  The number of bits in the exponent.
 * m     A single precision number that is the modulus.
 * returns  0 on success.
 * returns  MEMORY_E on dynamic memory allocation failure.
 * returns  MP_VAL when base is even.
 */
static int sp_2048_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits,
        const sp_digit* m)
{
    SP_DECL_VAR(sp_digit, td, 193);
    sp_digit* norm = NULL;
    sp_digit* tmp = NULL;
    sp_digit mp = 1;
    sp_digit n;
    sp_digit o;
    sp_digit mask;
    int i;
    int c;
    byte y;
    int err = MP_OKAY;

    if (bits == 0) {
        err = MP_VAL;
    }

    SP_ALLOC_VAR(sp_digit, td, 193, NULL, DYNAMIC_TYPE_TMP_BUFFER);
    if (err == MP_OKAY) {
        norm = td;
        tmp = td + 128;

        sp_2048_mont_setup(m, &mp);
        sp_2048_mont_norm_64(norm, m);

        i = (bits - 1) / 32;
        n = e[i--];
        c = bits & 31;
        if (c == 0) {
            c = 32;
        }
        c -= bits % 5;
        if (c == 32) {
            c = 27;
        }
        if (c < 0) {
            /* Number of bits in top word is less than number needed. */
            c = -c;
            y = (byte)(n << c);
            n = e[i--];
            y |= (byte)(n >> (64 - c));
            n <<= c;
            c = 64 - c;
        }
        else if (c == 0) {
            /* All bits in top word used. */
            y = (byte)n;
        }
        else {
            y = (byte)(n >> c);
            n <<= 32 - c;
        }
        sp_2048_lshift_64(r, norm, y);
        for (; i>=0 || c>=5; ) {
            if (c == 0) {
                n = e[i--];
                y = (byte)(n >> 27);
                n <<= 5;
                c = 27;
            }
            else if (c < 5) {
                y = (byte)(n >> 27);
                n = e[i--];
                c = 5 - c;
                y |= (byte)(n >> (32 - c));
                n <<= c;
                c = 32 - c;
            }
            else {
                y = (byte)((n >> 27) & 0x1f);
                n <<= 5;
                c -= 5;
            }

            sp_2048_mont_sqr_64(r, r, m, mp);
            sp_2048_mont_sqr_64(r, r, m, mp);
            sp_2048_mont_sqr_64(r, r, m, mp);
            sp_2048_mont_sqr_64(r, r, m, mp);
            sp_2048_mont_sqr_64(r, r, m, mp);

            sp_2048_lshift_64(r, r, y);
            sp_2048_mul_d_64(tmp, norm, r[64]);
            r[64] = 0;
            o = sp_2048_add_64(r, r, tmp);
            sp_2048_cond_sub_64(r, r, m, (sp_digit)0 - o);
        }

        XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
        sp_2048_mont_reduce_64(r, m, mp);

        mask = (sp_digit)0 - (sp_2048_cmp_64(r, m) >= 0);
        sp_2048_cond_sub_64(r, r, m, mask);
    }

    SP_FREE_VAR(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);

    return err;
}
#endif /* HAVE_FFDHE_2048 */

/* Perform the modular exponentiation for Diffie-Hellman.
 *
 * base     Base.
 * exp      Array of bytes that is the exponent.
 * expLen   Length of data, in bytes, in exponent.
 * mod      Modulus.
 * out      Buffer to hold big-endian bytes of exponentiation result.
 *          Must be at least 256 bytes long.
 * outLen   Length, in bytes, of exponentiation result.
 * returns 0 on success, MP_READ_E if there are too many bytes in an array
 * and MEMORY_E if memory allocation fails.
 */
int sp_DhExp_2048(const mp_int* base, const byte* exp, word32 expLen,
    const mp_int* mod, byte* out, word32* outLen)
{
    int err = MP_OKAY;
    sp_digit b[128];
    sp_digit e[64];
    sp_digit m[64];
    sp_digit* r = b;
    word32 i;

    if (mp_count_bits(base) > 2048) {
        err = MP_READ_E;
    }
    else if (expLen > 256) {
        err = MP_READ_E;
    }
    else if (mp_count_bits(mod) != 2048) {
        err = MP_READ_E;
    }
    else if (mp_iseven(mod)) {
        err = MP_VAL;
    }

    if (err == MP_OKAY) {
        sp_2048_from_mp(b, 64, base);
        sp_2048_from_bin(e, 64, exp, expLen);
        sp_2048_from_mp(m, 64, mod);

    #ifdef HAVE_FFDHE_2048
        if (base->used == 1 && base->dp[0] == 2 && m[63] == (sp_digit)-1)
            err = sp_2048_mod_exp_2_64(r, e, expLen * 8, m);
        else
    #endif
            err = sp_2048_mod_exp_64(r, b, e, expLen * 8, m, 0);

    }

    if (err == MP_OKAY) {
        sp_2048_to_bin_64(r, out);
        *outLen = 256;
        for (i=0; i<256 && out[i] == 0; i++) {
            /* Search for first non-zero. */
        }
        *outLen -= i;
        XMEMMOVE(out, out + i, *outLen);

    }

    XMEMSET(e, 0, sizeof(e));

    return err;
}
#endif /* WOLFSSL_HAVE_SP_DH */

/* Perform the modular exponentiation for Diffie-Hellman.
 *
 * base  Base. MP integer.
 * exp   Exponent. MP integer.
 * mod   Modulus. MP integer.
 * res   Result. MP integer.
 * returns 0 on success, MP_READ_E if there are too many bytes in an array
 * and MEMORY_E if memory allocation fails.
 */
int sp_ModExp_1024(const mp_int* base, const mp_int* exp, const mp_int* mod,
    mp_int* res)
{
    int err = MP_OKAY;
    sp_digit b[64];
    sp_digit e[32];
    sp_digit m[32];
    sp_digit* r = b;
    int expBits = mp_count_bits(exp);

    if (mp_count_bits(base) > 1024) {
        err = MP_READ_E;
    }
    else if (expBits > 1024) {
        err = MP_READ_E;
    }
    else if (mp_count_bits(mod) != 1024) {
        err = MP_READ_E;
    }
    else if (mp_iseven(mod)) {
        err = MP_VAL;
    }

    if (err == MP_OKAY) {
        sp_2048_from_mp(b, 32, base);
        sp_2048_from_mp(e, 32, exp);
        sp_2048_from_mp(m, 32, mod);

        err = sp_2048_mod_exp_32(r, b, e, expBits, m, 0);
    }

    if (err == MP_OKAY) {
        XMEMSET(r + 32, 0, sizeof(*r) * 32U);
        err = sp_2048_to_mp(r, res);
        res->used = mod->used;
        mp_clamp(res);
    }

    XMEMSET(e, 0, sizeof(e));

    return err;
}

#endif /* WOLFSSL_HAVE_SP_DH | (WOLFSSL_HAVE_SP_RSA & !WOLFSSL_RSA_PUBLIC_ONLY) */

#endif /* !WOLFSSL_SP_NO_2048 */

#ifndef WOLFSSL_SP_NO_3072
/* Read big endian unsigned byte array into r.
 *
 * r  A single precision integer.
 * size  Maximum number of bytes to convert
 * a  Byte array.
 * n  Number of bytes in array to read.
 */
static void sp_3072_from_bin(sp_digit* r, int size, const byte* a, int n)
{
    int i;
    int j;
    byte* d;

    j = 0;
    for (i = n - 1; i >= 3; i -= 4) {
        r[j]  = ((sp_digit)a[i - 0] <<  0) |
                ((sp_digit)a[i - 1] <<  8) |
                ((sp_digit)a[i - 2] << 16) |
                ((sp_digit)a[i - 3] << 24);
        j++;
    }

    if (i >= 0) {
        r[j] = 0;

        d = (byte*)(r + j);
#ifdef BIG_ENDIAN_ORDER
        switch (i) {
            case 2: d[1] = *(a++); //fallthrough
            case 1: d[2] = *(a++); //fallthrough
            case 0: d[3] = *a    ; //fallthrough
        }
#else
        switch (i) {
            case 2: d[i-2] = a[2]; //fallthrough
            case 1: d[i-1] = a[1]; //fallthrough
            case 0: d[i-0] = a[0]; //fallthrough
        }
#endif
        j++;
    }

    for (; j < size; j++) {
        r[j] = 0;
    }
}

/* Convert an mp_int to an array of sp_digit.
 *
 * r  A single precision integer.
 * size  Maximum number of bytes to convert
 * a  A multi-precision integer.
 */
static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a)
{
#if DIGIT_BIT == 32
    int i;
    sp_digit j = (sp_digit)0 - (sp_digit)a->used;
    int o = 0;

    for (i = 0; i < size; i++) {
        sp_digit mask = (sp_digit)0 - (j >> 31);
        r[i] = a->dp[o] & mask;
        j++;
        o += (int)(j >> 31);
    }
#elif DIGIT_BIT > 32
    unsigned int i;
    int j = 0;
    word32 s = 0;

    r[0] = 0;
    for (i = 0; i < (unsigned int)a->used && j < size; i++) {
        r[j] |= ((sp_digit)a->dp[i] << s);
        r[j] &= 0xffffffff;
        s = 32U - s;
        if (j + 1 >= size) {
            break;
        }
        /* lint allow cast of mismatch word32 and mp_digit */
        r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
        while ((s + 32U) <= (word32)DIGIT_BIT) {
            s += 32U;
            r[j] &= 0xffffffff;
            if (j + 1 >= size) {
                break;
            }
            if (s < (word32)DIGIT_BIT) {
                /* lint allow cast of mismatch word32 and mp_digit */
                r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
            }
            else {
                r[++j] = (sp_digit)0;
            }
        }
        s = (word32)DIGIT_BIT - s;
    }

    for (j++; j < size; j++) {
        r[j] = 0;
    }
#else
    unsigned int i;
    int j = 0;
    int s = 0;

    r[0] = 0;
    for (i = 0; i < (unsigned int)a->used && j < size; i++) {
        r[j] |= ((sp_digit)a->dp[i]) << s;
        if (s + DIGIT_BIT >= 32) {
            r[j] &= 0xffffffff;
            if (j + 1 >= size) {
                break;
            }
            s = 32 - s;
            if (s == DIGIT_BIT) {
                r[++j] = 0;
                s = 0;
            }
            else {
                r[++j] = a->dp[i] >> s;
                s = DIGIT_BIT - s;
            }
        }
        else {
            s += DIGIT_BIT;
        }
    }

    for (j++; j < size; j++) {
        r[j] = 0;
    }
#endif
}

/* Write r as big endian to byte array.
 * Fixed length number of bytes written: 384
 *
 * r  A single precision integer.
 * a  Byte array.
 */
static void sp_3072_to_bin_96(sp_digit* r, byte* a)
{
    int i;
    int j = 0;

    for (i = 95; i >= 0; i--) {
        a[j++] = r[i] >> 24;
        a[j++] = r[i] >> 16;
        a[j++] = r[i] >> 8;
        a[j++] = r[i] >> 0;
    }
}

#if (defined(WOLFSSL_HAVE_SP_RSA) && (!defined(WOLFSSL_RSA_PUBLIC_ONLY) || !defined(WOLFSSL_SP_SMALL))) || defined(WOLFSSL_HAVE_SP_DH)
/* Normalize the values in each word to 32.
 *
 * a  Array of sp_digit to normalize.
 */
#define sp_3072_norm_96(a)

#endif /* (WOLFSSL_HAVE_SP_RSA && (!WOLFSSL_RSA_PUBLIC_ONLY || !WOLFSSL_SP_SMALL)) || WOLFSSL_HAVE_SP_DH */
/* Normalize the values in each word to 32.
 *
 * a  Array of sp_digit to normalize.
 */
#define sp_3072_norm_96(a)

#ifndef WOLFSSL_SP_SMALL
#ifndef WOLFSSL_SP_LARGE_CODE
/* Multiply a and b into r. (r = a * b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
SP_NOINLINE static void sp_3072_mul_12(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    sp_digit t[12 * 2];
    sp_digit* tmp = t;
    __asm__ __volatile__ (
        "movs	r3, #0\n\t"
        "movs	r4, #0\n\t"
        "mov	r8, r3\n\t"
        "mov	r11, %[tmp]\n\t"
        "mov	r9, %[a]\n\t"
        "mov	r10, %[b]\n\t"
        "movs	r6, #48\n\t"
        "add	r6, r6, r9\n\t"
        "mov	r12, r6\n\t"
        "\n"
    "L_sp_3072_mul_12_words_%=:\n\t"
        "movs	%[tmp], #0\n\t"
        "movs	r5, #0\n\t"
        "movs	r6, #44\n\t"
        "mov	%[a], r8\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	%[a], %[a], r6\n\t"
#else
        "sub	%[a], %[a], r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "sbcs	r6, r6, r6\n\t"
#elif defined(__clang__)
        "sbcs	r6, r6\n\t"
#else
        "sbc	r6, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "mvns	r6, r6\n\t"
#else
        "mvn	r6, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "ands	%[a], %[a], r6\n\t"
#elif defined(__clang__)
        "ands	%[a], r6\n\t"
#else
        "and	%[a], r6\n\t"
#endif
        "mov	%[b], r8\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	%[b], %[b], %[a]\n\t"
#else
        "sub	%[b], %[b], %[a]\n\t"
#endif
        "add	%[a], %[a], r9\n\t"
        "add	%[b], %[b], r10\n\t"
        "\n"
    "L_sp_3072_mul_12_mul_%=:\n\t"
        "# Multiply Start\n\t"
        "ldrh	r6, [%[a]]\n\t"
        "ldrh	r7, [%[b]]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[tmp]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[tmp]\n\t"
#else
        "adc	r4, %[tmp]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[tmp]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[tmp]\n\t"
#else
        "adc	r5, %[tmp]\n\t"
#endif
        "ldr	r7, [%[b]]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r7, #16\n\t"
#else
        "lsr	r7, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[tmp]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[tmp]\n\t"
#else
        "adc	r5, %[tmp]\n\t"
#endif
        "ldr	r6, [%[a]]\n\t"
        "ldr	r7, [%[b]]\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, r6, #16\n\t"
#else
        "lsr	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r7, #16\n\t"
#else
        "lsr	r7, r7, #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[tmp]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[tmp]\n\t"
#else
        "adc	r5, %[tmp]\n\t"
#endif
        "ldrh	r7, [%[b]]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[tmp]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[tmp]\n\t"
#else
        "adc	r5, %[tmp]\n\t"
#endif
        "# Multiply Done\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	%[a], %[a], #4\n\t"
#else
        "add	%[a], %[a], #4\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "subs	%[b], %[b], #4\n\t"
#else
        "sub	%[b], %[b], #4\n\t"
#endif
        "cmp	%[a], r12\n\t"
        "beq	L_sp_3072_mul_12_done_mul_%=\n\t"
        "mov	r6, r8\n\t"
        "add	r6, r6, r9\n\t"
        "cmp	%[a], r6\n\t"
        "ble	L_sp_3072_mul_12_mul_%=\n\t"
        "\n"
    "L_sp_3072_mul_12_done_mul_%=:\n\t"
        "mov	%[tmp], r11\n\t"
        "mov	r7, r8\n\t"
        "str	r3, [%[tmp], r7]\n\t"
        "movs	r3, r4\n\t"
        "movs	r4, r5\n\t"
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r7, r7, #4\n\t"
#else
        "add	r7, r7, #4\n\t"
#endif
        "mov	r8, r7\n\t"
        "movs	r6, #0x58\n\t"
        "cmp	r7, r6\n\t"
        "ble	L_sp_3072_mul_12_words_%=\n\t"
        "str	r3, [%[tmp], r7]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        : [a] "+l" (a), [b] "+l" (b), [tmp] "+l" (tmp)
        :
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc"
    );

    XMEMCPY(r, t, sizeof(t));
}

#else
/* Multiply a and b into r. (r = a * b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
SP_NOINLINE static void sp_3072_mul_12(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    __asm__ __volatile__ (
        "sub	sp, sp, #48\n\t"
        "mov	r8, %[r]\n\t"
        "mov	r9, %[a]\n\t"
        "mov	r10, %[b]\n\t"
        "movs	%[r], #0\n\t"
        "#  A[0] * B[0]\n\t"
        "ldr	%[a], [%[a]]\n\t"
        "ldr	%[b], [%[b]]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r3, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r3, r6, r3\n\t"
#elif defined(__clang__)
        "muls	r3, r6\n\t"
#else
        "mul	r3, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r4, r6, #16\n\t"
#else
        "lsr	r4, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
        "movs	r5, #0\n\t"
        "str	r3, [sp]\n\t"
        "#  A[0] * B[1]\n\t"
        "movs	r3, #0\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a]]\n\t"
        "ldr	%[b], [%[b], #4]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[1] * B[0]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #4]\n\t"
        "ldr	%[b], [%[b]]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "str	r4, [sp, #4]\n\t"
        "#  A[2] * B[0]\n\t"
        "movs	r4, #0\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #8]\n\t"
        "ldr	%[b], [%[b]]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[1] * B[1]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #4]\n\t"
        "ldr	%[b], [%[b], #4]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[0] * B[2]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a]]\n\t"
        "ldr	%[b], [%[b], #8]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "str	r5, [sp, #8]\n\t"
        "#  A[0] * B[3]\n\t"
        "movs	r5, #0\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a]]\n\t"
        "ldr	%[b], [%[b], #12]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[1] * B[2]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #4]\n\t"
        "ldr	%[b], [%[b], #8]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[2] * B[1]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #8]\n\t"
        "ldr	%[b], [%[b], #4]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[3] * B[0]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #12]\n\t"
        "ldr	%[b], [%[b]]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "str	r3, [sp, #12]\n\t"
        "#  A[4] * B[0]\n\t"
        "movs	r3, #0\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #16]\n\t"
        "ldr	%[b], [%[b]]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[3] * B[1]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #12]\n\t"
        "ldr	%[b], [%[b], #4]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[2] * B[2]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #8]\n\t"
        "ldr	%[b], [%[b], #8]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[1] * B[3]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #4]\n\t"
        "ldr	%[b], [%[b], #12]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[0] * B[4]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a]]\n\t"
        "ldr	%[b], [%[b], #16]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "str	r4, [sp, #16]\n\t"
        "#  A[0] * B[5]\n\t"
        "movs	r4, #0\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a]]\n\t"
        "ldr	%[b], [%[b], #20]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[1] * B[4]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #4]\n\t"
        "ldr	%[b], [%[b], #16]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[2] * B[3]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #8]\n\t"
        "ldr	%[b], [%[b], #12]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[3] * B[2]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #12]\n\t"
        "ldr	%[b], [%[b], #8]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[4] * B[1]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #16]\n\t"
        "ldr	%[b], [%[b], #4]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[5] * B[0]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #20]\n\t"
        "ldr	%[b], [%[b]]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "str	r5, [sp, #20]\n\t"
        "#  A[6] * B[0]\n\t"
        "movs	r5, #0\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #24]\n\t"
        "ldr	%[b], [%[b]]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[5] * B[1]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #20]\n\t"
        "ldr	%[b], [%[b], #4]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[4] * B[2]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #16]\n\t"
        "ldr	%[b], [%[b], #8]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[3] * B[3]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #12]\n\t"
        "ldr	%[b], [%[b], #12]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[2] * B[4]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #8]\n\t"
        "ldr	%[b], [%[b], #16]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[1] * B[5]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #4]\n\t"
        "ldr	%[b], [%[b], #20]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[0] * B[6]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a]]\n\t"
        "ldr	%[b], [%[b], #24]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "str	r3, [sp, #24]\n\t"
        "#  A[0] * B[7]\n\t"
        "movs	r3, #0\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a]]\n\t"
        "ldr	%[b], [%[b], #28]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[1] * B[6]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #4]\n\t"
        "ldr	%[b], [%[b], #24]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[2] * B[5]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #8]\n\t"
        "ldr	%[b], [%[b], #20]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[3] * B[4]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #12]\n\t"
        "ldr	%[b], [%[b], #16]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[4] * B[3]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #16]\n\t"
        "ldr	%[b], [%[b], #12]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[5] * B[2]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #20]\n\t"
        "ldr	%[b], [%[b], #8]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[6] * B[1]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #24]\n\t"
        "ldr	%[b], [%[b], #4]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[7] * B[0]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #28]\n\t"
        "ldr	%[b], [%[b]]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "str	r4, [sp, #28]\n\t"
        "#  A[8] * B[0]\n\t"
        "movs	r4, #0\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #32]\n\t"
        "ldr	%[b], [%[b]]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[7] * B[1]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #28]\n\t"
        "ldr	%[b], [%[b], #4]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[6] * B[2]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #24]\n\t"
        "ldr	%[b], [%[b], #8]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[5] * B[3]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #20]\n\t"
        "ldr	%[b], [%[b], #12]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[4] * B[4]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #16]\n\t"
        "ldr	%[b], [%[b], #16]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[3] * B[5]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #12]\n\t"
        "ldr	%[b], [%[b], #20]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[2] * B[6]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #8]\n\t"
        "ldr	%[b], [%[b], #24]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[1] * B[7]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #4]\n\t"
        "ldr	%[b], [%[b], #28]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[0] * B[8]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a]]\n\t"
        "ldr	%[b], [%[b], #32]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "str	r5, [sp, #32]\n\t"
        "#  A[0] * B[9]\n\t"
        "movs	r5, #0\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a]]\n\t"
        "ldr	%[b], [%[b], #36]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[1] * B[8]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #4]\n\t"
        "ldr	%[b], [%[b], #32]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[2] * B[7]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #8]\n\t"
        "ldr	%[b], [%[b], #28]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[3] * B[6]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #12]\n\t"
        "ldr	%[b], [%[b], #24]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[4] * B[5]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #16]\n\t"
        "ldr	%[b], [%[b], #20]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[5] * B[4]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #20]\n\t"
        "ldr	%[b], [%[b], #16]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[6] * B[3]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #24]\n\t"
        "ldr	%[b], [%[b], #12]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[7] * B[2]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #28]\n\t"
        "ldr	%[b], [%[b], #8]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[8] * B[1]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #32]\n\t"
        "ldr	%[b], [%[b], #4]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[9] * B[0]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #36]\n\t"
        "ldr	%[b], [%[b]]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "str	r3, [sp, #36]\n\t"
        "#  A[10] * B[0]\n\t"
        "movs	r3, #0\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #40]\n\t"
        "ldr	%[b], [%[b]]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[9] * B[1]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #36]\n\t"
        "ldr	%[b], [%[b], #4]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[8] * B[2]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #32]\n\t"
        "ldr	%[b], [%[b], #8]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[7] * B[3]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #28]\n\t"
        "ldr	%[b], [%[b], #12]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[6] * B[4]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #24]\n\t"
        "ldr	%[b], [%[b], #16]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[5] * B[5]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #20]\n\t"
        "ldr	%[b], [%[b], #20]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[4] * B[6]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #16]\n\t"
        "ldr	%[b], [%[b], #24]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[3] * B[7]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #12]\n\t"
        "ldr	%[b], [%[b], #28]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[2] * B[8]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #8]\n\t"
        "ldr	%[b], [%[b], #32]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[1] * B[9]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #4]\n\t"
        "ldr	%[b], [%[b], #36]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[0] * B[10]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a]]\n\t"
        "ldr	%[b], [%[b], #40]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "str	r4, [sp, #40]\n\t"
        "#  A[0] * B[11]\n\t"
        "movs	r4, #0\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a]]\n\t"
        "ldr	%[b], [%[b], #44]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[1] * B[10]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #4]\n\t"
        "ldr	%[b], [%[b], #40]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[2] * B[9]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #8]\n\t"
        "ldr	%[b], [%[b], #36]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[3] * B[8]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #12]\n\t"
        "ldr	%[b], [%[b], #32]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[4] * B[7]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #16]\n\t"
        "ldr	%[b], [%[b], #28]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[5] * B[6]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #20]\n\t"
        "ldr	%[b], [%[b], #24]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[6] * B[5]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #24]\n\t"
        "ldr	%[b], [%[b], #20]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[7] * B[4]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #28]\n\t"
        "ldr	%[b], [%[b], #16]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[8] * B[3]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #32]\n\t"
        "ldr	%[b], [%[b], #12]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[9] * B[2]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #36]\n\t"
        "ldr	%[b], [%[b], #8]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[10] * B[1]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #40]\n\t"
        "ldr	%[b], [%[b], #4]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "#  A[11] * B[0]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #44]\n\t"
        "ldr	%[b], [%[b]]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r6\n\t"
#else
        "add	r5, r5, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, r7\n\t"
#elif defined(__clang__)
        "adcs	r3, r7\n\t"
#else
        "adc	r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
        "str	r5, [sp, #44]\n\t"
        "#  A[11] * B[1]\n\t"
        "movs	r5, #0\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #44]\n\t"
        "ldr	%[b], [%[b], #4]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[10] * B[2]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #40]\n\t"
        "ldr	%[b], [%[b], #8]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[9] * B[3]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #36]\n\t"
        "ldr	%[b], [%[b], #12]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[8] * B[4]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #32]\n\t"
        "ldr	%[b], [%[b], #16]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[7] * B[5]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #28]\n\t"
        "ldr	%[b], [%[b], #20]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[6] * B[6]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #24]\n\t"
        "ldr	%[b], [%[b], #24]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[5] * B[7]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #20]\n\t"
        "ldr	%[b], [%[b], #28]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[4] * B[8]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #16]\n\t"
        "ldr	%[b], [%[b], #32]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[3] * B[9]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #12]\n\t"
        "ldr	%[b], [%[b], #36]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[2] * B[10]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #8]\n\t"
        "ldr	%[b], [%[b], #40]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "#  A[1] * B[11]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #4]\n\t"
        "ldr	%[b], [%[b], #44]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r7\n\t"
#else
        "add	r3, r3, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r4, %[r]\n\t"
#else
        "adc	r4, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r3, r3, r6\n\t"
#else
        "add	r3, r3, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r4, r4, r7\n\t"
#elif defined(__clang__)
        "adcs	r4, r7\n\t"
#else
        "adc	r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
        "mov	%[r], r8\n\t"
        "str	r3, [%[r], #48]\n\t"
        "movs	%[r], #0\n\t"
        "#  A[2] * B[11]\n\t"
        "movs	r3, #0\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #8]\n\t"
        "ldr	%[b], [%[b], #44]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[3] * B[10]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #12]\n\t"
        "ldr	%[b], [%[b], #40]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[4] * B[9]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #16]\n\t"
        "ldr	%[b], [%[b], #36]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "#  A[5] * B[8]\n\t"
        "mov	%[a], r9\n\t"
        "mov	%[b], r10\n\t"
        "ldr	%[a], [%[a], #20]\n\t"
        "ldr	%[b], [%[b], #32]\n\t"
        "uxth	r6, %[a]\n\t"
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r7\n\t"
#else
        "add	r4, r4, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r5, %[r]\n\t"
#else
        "adc	r5, %[r]\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, r6, #16\n\t"
#else
        "lsr	r7, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsls	r6, r6, #16\n\t"
#else
        "lsl	r6, r6, #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r4, r4, r6\n\t"
#else
        "add	r4, r4, r6\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r5, r5, r7\n\t"
#elif defined(__clang__)
        "adcs	r5, r7\n\t"
#else
        "adc	r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r6, %[a], #16\n\t"
#else
        "lsr	r6, %[a], #16\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "lsrs	r7, %[b], #16\n\t"
#else
        "lsr	r7, %[b], #16\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "muls	r7, r6, r7\n\t"
#elif defined(__clang__)
        "muls	r7, r6\n\t"
#else
        "mul	r7, r6\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "adds	r5, r5, r7\n\t"
#else
        "add	r5, r5, r7\n\t"
#endif
#ifdef WOLFSSL_KEIL
        "adcs	r3, r3, %[r]\n\t"
#elif defined(__clang__)
        "adcs	r3, %[r]\n\t"
#else
        "adc	r3, %[r]\n\t"
#endif
        "uxth	r7, %[b]\n\t"
#ifdef WOLFSSL_KEIL
        "muls	r6, r7, r6\n\t"
#elif defined(__clang__)
        "muls	r6, r7\n\t"
#else
        "mul	r6, r7\n\t"
#endif
#if defined(__clang__) || defined(WOLFSSL_KEIL)
        "