SHA256

Now that I had SeverHello which contains a cipher suite to be used in key exchange, I can start serious algorithms instead of tedious data parsing. I sent different cipher suites with ClientHello to different servers and found only a several types of them are used such as

  • TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256
  • TLS_RSA_WITH_AES_128_GCM_SHA386

I first took a look at the famous RSA algorithm which requires big integer operations. I don’t know which big int library is best for my C++ project. In Google’s foobar challenges, I used the standard BigInteger class in java. It was easy and had no problem. I don’t know why no big int library in C++ std or boost. (Boost has GMP wrappers!)

For now, I gave up RSA just for that reason and instead tried to find simpler ones to implement. SHA seems a good one and is used everywhere. By just implementing the pseudo code in Wikipedia, I could run it successfully, although at first the result didn’t match with other implementations until I found a length field is expressed in bits, not in bytes!

The algorithm itself is pretty straightforward. It only involves with mixing data by rotating, shifting, XOR-ing and adding 32-bit values. Implementing this for FPGA or ASIC should be easy, too. The family of SHA algorithms is widely used for cryptocurrencies. Faster execution means more money for coin miners. I wonder how much energy is wasted on global scale just for executing this algorithm.

Below is a code snippet of my SHA256 implementation.


typedef std::vector<uint> vword;
 
/*Function to right rotate n by d bits*/
inline uint rotr(uint n, uint d)
{ 
    return (n >> d)|(n << (32 - d));
}

inline uint shiftr(uint n, uint d)
{
    return n >> d;
}

vchar sha256(uchar *_input, int len)
{
    uint _h[] = {   0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 
                    0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19};
    uint _k[] = {
                0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
                0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
                0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
                0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
                0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
                0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
                0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
                0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2};

    vchar input(_input, _input + len);
    input.push_back(0x80); // add 1000 0000
    int zeros = (64 * 2 - ((len + 1 + 64) % 64) - 8) % 64;
    for(int i = 0; i < zeros; i++)
        input.push_back(0);
    // TODO: assumes len is less than 2^32.
    uint len_i = htonl(len * 8);
    uchar *len_bytes = (uchar*)&len_i;
    input.push_back(0);
    input.push_back(0);
    input.push_back(0);
    input.push_back(0);
    input.push_back(len_bytes[0]);
    input.push_back(len_bytes[1]);
    input.push_back(len_bytes[2]);
    input.push_back(len_bytes[3]);

    len = input.size(); // must be multiple of 64.

    for(int chu = 0; chu < len; chu += 64)
    {
        // chunk: 64 bytes, or 16 words.
        vchar chunk(input.begin() + chu, input.begin() + chu + 64);
        vword w(64, 0);

        // copy chunk to first 16 words of w.
        for(int i = 0; i < 16; i++)
        {
            uint v = *(uint*)(chunk.data() + i * 4);
            v = ntohl(v); // to little endian.
            w[i] = v;
        }

        for(int i = 16; i < 64; i++)
        {
            uint s0 = rotr(w[i-15], 7) ^ rotr(w[i-15], 18) ^ shiftr(w[i-15], 3);
            uint s1 = rotr(w[i-2], 17) ^ rotr(w[i-2], 19) ^ shiftr(w[i-2], 10);
            w[i] = w[i-16] + s0 + w[i-7] + s1;
        }

        uint a = _h[0];
        uint b = _h[1];
        uint c = _h[2];
        uint d = _h[3];
        uint e = _h[4];
        uint f = _h[5];
        uint g = _h[6];
        uint h = _h[7];

        for (int i = 0; i < 64; i++)
        {
            uint S1 = rotr(e, 6) ^ rotr(e, 11) ^ rotr(e, 25);
            uint ch = (e & f) ^ ((~e) & g);
            uint temp1 = h + S1 + ch + _k[i] + w[i];
            uint S0 = rotr(a,2) ^ rotr(a, 13) ^ rotr(a, 22);
            uint maj = (a & b) ^ (a & c) ^ (b & c);
            uint temp2 = S0 + maj;

            h = g;
            g = f;
            f = e;
            e = d + temp1;
            d = c;
            c = b;
            b = a;
            a = temp1 + temp2;
        }

        // Add the compressed chunk to the current hash value:
        _h[0] += a;
        _h[1] += b;
        _h[2] += c;
        _h[3] += d;
        _h[4] += e;
        _h[5] += f;
        _h[6] += g;
        _h[7] += h;
    }

    // Produce the final hash value (big-endian):
    // digest := hash := h0 append h1 append h2 append h3 append h4 append h5 append h6 append h7
    vchar digest;
    for(int i = 0; i < 8; i++)
    {        
        uchar *p = (uchar*)&_h[i];
        digest.push_back(p[3]);
        digest.push_back(p[2]);
        digest.push_back(p[1]);
        digest.push_back(p[0]);
    }
    return digest;
}

Leave a Reply

Your email address will not be published. Required fields are marked *