TinyCLR has a powerful feature called RLI (Runtime Loadable Interops). This is similar to old RLP on NETMF but on steroids! Why do you need this? Say you need to calculate checksum or encrypt some data. Doing this in managed is slow and has no benefit as you never need to really step inside the core code. The better approach is to write those small, but time consuming, tasks in native code. This is like writing the game logic in C# but then the 3D engine in C++/Assembly. The DLL on the PC is RLI on TinyCLR.
Allow me to show you some magic! Here is my XTEA crypto class:
(original code and details XTEA - Wikipedia)
using System;
using System.Runtime.CompilerServices;
namespace Cipher {
static class Xtea {
[MethodImpl(MethodImplOptions.InternalCall)]
static public extern void EncipherFast(uint Rounds, uint[] Data, uint[] Key);
static public void EncipherSlow(uint Rounds, uint[] Data, uint[] Key) {
uint i;
uint v0 = Data[0], v1 = Data[1], sum = 0, delta = 0x9E3779B9;
for (i = 0; i < Rounds; i++) {
v0 += (((v1 << 4) ^ (v1 >> 5)) + v1) ^ (sum + Key[sum & 3]);
sum += delta;
v1 += (((v0 << 4) ^ (v0 >> 5)) + v0) ^ (sum + Key[(sum >> 11) & 3]);
}
Data[0] = v0; Data[1] = v1;
}
[MethodImpl(MethodImplOptions.InternalCall)]
static public extern void DecipherFast(uint Rounds, uint[] Data, uint[] Key);
static public void DecipherSlow(uint Rounds, uint[] Data, uint[] Key) {
uint i;
uint v0 = Data[0], v1 = Data[1], delta = 0x9E3779B9, sum = delta * Rounds;
for (i = 0; i < Rounds; i++) {
v1 -= (((v0 << 4) ^ (v0 >> 5)) + v0) ^ (sum + Key[(sum >> 11) & 3]);
sum -= delta;
v0 -= (((v1 << 4) ^ (v1 >> 5)) + v1) ^ (sum + Key[sum & 3]);
}
Data[0] = v0; Data[1] = v1;
}
}
}
You can see that the functions are available in 2 formats. One is the regular way and the other one is extern
InternalCall
. Having both options allow me to test the native code and to compare speed.
The main program started by loading the RLI region from a binary that was compiled separately and loaded as a resource. It then tests XTEA encoding/decoding between managed and native code.
using System;
using System.Threading;
using System.Runtime.InteropServices;
using GHIElectronics.TinyCLR.Native;
using System.Diagnostics;
using GHIElectronics.TinyCLR.Devices.Gpio;
namespace InterTest {
class Program {
static void Blinker() {
var LED = GpioController.GetDefault().OpenPin(GHIElectronics.TinyCLR.Pins.FEZ.GpioPin.Led1);
LED.SetDriveMode(GpioPinDriveMode.Output);
while (true){
LED.Write(GpioPinValue.High);
Thread.Sleep(100);
LED.Write(GpioPinValue.Low);
Thread.Sleep(100);
}
}
static void Main() {
new Thread(Blinker).Start();
const uint RLI_ADDRESS = 0x20016000;
//copy native interop to RLI
var interop = Resource.GetBytes(Resource.BinaryResources.Interop);
Marshal.Copy(interop, 0, new IntPtr(RLI_ADDRESS), interop.Length);
interop = null;
System.GC.Collect();
// Let the CLR know about it
// You normally need ot go in the MAP file of the finary to find the location of the table
// In this example, we modified the "scatter file" to position the table at the base
// So, in this case base address of RLI (Runtime Loadable Interops) is the same as interop table
Interop.Add(new IntPtr(RLI_ADDRESS));
// Test XTEA
Debug.WriteLine(Environment.NewLine + "Test XTEA is working.");
var Data = new uint[2] { 0x12345678, 0x98765432 }; // 8-byte block
var Key = new uint[4] { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }; // 128-bit key
Debug.WriteLine("Original Data : " + Data[0].ToString("X") + " " + Data[1].ToString("X"));
Cipher.Xtea.EncipherFast(32, Data, Key);
Debug.WriteLine("Fast Encoded Data: " + Data[0].ToString("X") + " " + Data[1].ToString("X"));
Cipher.Xtea.DecipherSlow(32, Data, Key);
Debug.WriteLine("Slow Decoded Data: " + Data[0].ToString("X") + " " + Data[1].ToString("X"));
Cipher.Xtea.EncipherSlow(32, Data, Key);
Debug.WriteLine("Slow Encoded Data: " + Data[0].ToString("X") + " " + Data[1].ToString("X"));
Cipher.Xtea.DecipherFast(32, Data, Key);
Debug.WriteLine("Fast Decoded Data: " + Data[0].ToString("X") + " " + Data[1].ToString("X"));
// Compare speed
Debug.WriteLine(Environment.NewLine + "Speed Test for 1000 native enc/dec loops.");
var T = DateTime.Now;
for (var i = 0; i < 1000; i++) {
Cipher.Xtea.EncipherFast(32, Data, Key);
Cipher.Xtea.DecipherFast(32, Data, Key);
}
var DeltaNativeTime = DateTime.Now - T;
Debug.WriteLine("Time -> " + (DeltaNativeTime.TotalMilliseconds / 1000).ToString("F2") + " Seconds.");
Debug.WriteLine("Speed Test for 1000 managed enc/dec loops.");
T = DateTime.Now;
for (var i = 0; i < 1000; i++) {
Cipher.Xtea.EncipherSlow(32, Data, Key);
Cipher.Xtea.DecipherSlow(32, Data, Key);
}
var DeltaManagedTime = DateTime.Now - T;
Debug.WriteLine("Time -> " + (DeltaManagedTime.TotalMilliseconds / 1000).ToString("F2") + " Seconds.");
Debug.WriteLine(Environment.NewLine + "Native was " + (DeltaManagedTime.TotalMilliseconds / DeltaNativeTime.TotalMilliseconds).ToString("F0") + " times faster than managed.");
Thread.Sleep(Timeout.Infinite);
}
}
}
When running the program on FEZ, here is what I get back:
Test XTEA is working.
Original Data : 12345678 98765432
Fast Encoded Data: 3C07D596 56515071
Slow Decoded Data: 12345678 98765432
Slow Encoded Data: 3C07D596 56515071
Fast Decoded Data: 12345678 98765432
Speed Test for 1000 native enc/dec loops.
Time → 0.09 Seconds.
Speed Test for 1000 managed enc/dec loops.
Time → 3.49 Seconds.
Native was 39 times faster than managed.
Almost 40 times faster and you can do that in minutes! Now, to the coolest part, which is how to implement and compile the native code? You can actually do this right in Visual Studio and enjoy it without installing any other software and enjoy the beauty of intellisense!
Well, I love teasing! You will have to come back to see how that is done