Speed comparison

I was curious to find out how fast does G80 perform compared to some other boards GHI is offering. I have used @ Duke Nukem code, which was slightly modified, and I ran it on NETMF 4.3.7.8. Here are the results:

G80: 00:03:28.0820210
G120: 00:13:00.3519469
G400: 00:01:31.4454191
Hydra+: 00:05:49.8682380
Cerb40 II: 00:03:30.9686550

G400 is still a king and G120 does not look good :cry:
G120 and G400 are the only two SoCs that offer all the goodies from GHI, however, the performance gap is huge.

Code (this website distorts my code, here is original - http://pastebin.com/Dzm9ReER):

public class Program
{
	public static void Main()
	{
		Debug.Print("Program Started");
		string x = Process(200); // calculate 200 digits of PI
		Debug.Print("PI=" + x);
	}

	/*     
	 * Original code: http://omegacoder.com/?p=91
	 * Adapted code: https://www.ghielectronics.com/community/codeshare/entry/597
	 */

	public static string Process(int digits)
	{
		var result = new StringBuilder();
		result.Append("3.");
		DateTime StartTime = DateTime.Now;
		if (digits > 0)
		{
			for (int i = 0; i < digits; i += 9)
			{
				String ds = CalculatePiDigits(i + 1);
				int digitCount = Math.Min(digits - i, 9);
				if (ds.Length < 9)
					//ds = string.Format("{0:D9}", int.Parse(ds));
					ds = int.Parse(ds).ToString("0:D9");
				result.Append(ds.Substring(0, digitCount));
			}
		}
		TimeSpan duration = DateTime.Now.Subtract(StartTime);
		Debug.Print("Time elapsed: " + duration.ToString());
		return result.ToString();
	}

	private static int mul_mod(int a, int b, int m)
	{
		return (int)((a * (long)b) % m);
	}

	// return the inverse of x mod y 
	private static int inv_mod(int x, int y)
	{
		int q, u, v, a, c, t;
		u = x;
		v = y;
		c = 1;
		a = 0;
		do
		{
			q = v / u;
			t = c;
			c = a - q * c;
			a = t;
			t = u;
			u = v - q * u;
			v = t;
		} while (u != 0);
		a = a % y;
		if (a < 0)
		{
			a = y + a;
		}
		return a;
	}

	// return (a^b) mod m 
	private static int pow_mod(int a, int b, int m)
	{
		int r, aa;
		r = 1;
		aa = a;
		while (true)
		{
			if ((b & 1) != 0)
			{
				r = mul_mod(r, aa, m);
			}
			b = b >> 1;
			if (b == 0)
			{
				break;
			}
			aa = mul_mod(aa, aa, m);
		}
		return r;
	}

	// return true if n is prime 
	private static bool is_prime(int n)
	{
		if ((n % 2) == 0)
		{
			return false;
		}
		var r = (int)Math.Sqrt(n);
		for (int i = 3; i <= r; i += 2)
		{
			if ((n % i) == 0)
			{
				return false;
			}
		}
		return true;
	}

	// return the prime number immediately after n
	private static int next_prime(int n)
	{
		do
		{
			n++;
		} while (!is_prime(n));
		return n;
	}

	private static string CalculatePiDigits(int n)
	{
		int av, vmax, num, den, s, t;
		var N = (int)((n + 20) * Math.Log(10) / Math.Log(2));
		double sum = 0;
		for (int a = 3; a <= (2 * N); a = next_prime(a))
		{
			vmax = (int)(Math.Log(2 * N) / Math.Log(a));
			av = 1;
			for (int i = 0; i < vmax; i++)
			{
				av = av * a;
			}
			s = 0;
			num = 1;
			den = 1;
			int v = 0;
			int kq = 1;
			int kq2 = 1;
			for (int k = 1; k <= N; k++)
			{
				t = k;
				if (kq >= a)
				{
					do
					{
						t = t / a;
						v--;
					} while ((t % a) == 0);
					kq = 0;
				}
				kq++;
				num = mul_mod(num, t, av);
				t = 2 * k - 1;
				if (kq2 >= a)
				{
					if (kq2 == a)
					{
						do
						{
							t = t / a;
							v++;
						} while ((t % a) == 0);
					}
					kq2 -= a;
				}
				den = mul_mod(den, t, av);
				kq2 += 2;
				if (v > 0)
				{
					t = inv_mod(den, av);
					t = mul_mod(t, num, av);
					t = mul_mod(t, k, av);
					for (int i = v; i < vmax; i++)
					{
						t = mul_mod(t, a, av);
					}
					s += t;
					if (s >= av)
					{
						s -= av;
					}
				}
			}
			t = pow_mod(10, n - 1, av);
			s = mul_mod(s, t, av);
			sum = (sum + s / (double)av) % 1.0;
		}
		int Resultx = (int)(sum * 1e9);

		string StringResult = Resultx.ToString(); //String.Format("{0:D9}", Result));

		if (StringResult.Length < 9)
		{
			for (int i = StringResult.Length; i < 9; i++)
			{
				StringResult = "0" + StringResult;
			}
		}
		return StringResult;
	}
}
2 Likes

The moral here is that the STM32 is a very high-performing chip. Now that the new ones support DRAM, a case could be made to move all the systems over to it. It would simplify GHI’s development significantly, I would guess.

I think it’d be interesting to see a performance-per-watt comparison as well, my guess is that the G400 (and Hydra) would show poorly in a comparison like that, just because of the extra hardware.

With FPU calculations, probably even G400 would fall…

A fundamental thing to remember here is that on-chip SRAM will always be both more power-efficient and much higher performing than external DRAM.

External SRAM (which is supported by STM32, and probably the NXP chips) is expensive but faster. How it would compare to internal SRAM, I couldn’t say.

Let’s see…

G80: 00:03:28.0697560
G120: 00:10:41.8318678
G400: 00:01:19.3231995
Hydra+: 00:02:44.6428838
Cerb40 II: 00:03:49.9262320

Code:

Sub Main()
	Dim dStart, dEnd As DateTime
	Dim f = 987654321.12345684R

	Debug.Print("Program started")
	dStart = DateTime.Now
	For i = 1 To 135347
		For ii = 1 To 100
			f /= 1.0987654321R
		Next
		For ii = 1 To 100
			f *= 1.0987654321R
		Next
	Next

	dEnd = DateTime.Now
	Debug.Print("Time elapsed: " & dEnd.Subtract(dStart).ToString())
	Debug.Print("f=" & f.ToString())
End Sub

@ iamin - That’s double precision numbers. CortexM4 only has single precision FPU. Would you please redo the test? :slight_smile:

Well, here you go.

G80: 00:03:28.0675570
G120: 00:11:25.1713040
G400: 00:01:20.9176324
Hydra+: 00:02:44.9115418
Cerb40 II: 00:03:36.6914380

Code:

Sub Main()
	Dim dStart, dEnd As DateTime
	Dim f = 98765.43F

	Debug.Print("Program started")
	dStart = DateTime.Now
	For i = 1 To 155833
		For ii = 1 To 50
			f /= 1.09876537F
		Next
		For ii = 1 To 50
			f *= 1.09876537F
		Next
		For ii = 1 To 50
			f -= 1.09876537F
		Next
		For ii = 1 To 50
			f += 1.09876537F
		Next
	Next

	dEnd = DateTime.Now
	Debug.Print("Time elapsed: " & dEnd.Subtract(dStart).ToString())
	Debug.Print("f=" & f.ToString())
End Sub

Could anyone be kind enough and repeat these three test on Panda III?

@ iamin - Panda III is G80. What did you use to test G80?

@ Architect - I know that Panda III is based on G80. I have used G80 Dev Board. I have results from Quail mainboard and they look very different compared to my G80 results, so I am not sure what is going on. Hence I have asked to double check, “just in case”.

Panda III Results

Test 1 -
Time elapsed: 00:02:37.2292040
PI=3.1415926535897932384626433832795028841971693993751058209749…294895493038196

Test 2 -
Time elapsed: 00:03:28.0716020
f=987654321.12345648

Test 3 -
Time elapsed: 00:03:28.0689920
f=98765.4531

1 Like

I redid the first test and I still get the same time frame as before (that’s almost 1 minute difference!). Very strange…

Oxygen running a 411 @ 96Mhz

Program Started
Time elapsed: 00:04:01.5554790
PI=3.141592653589793238462643383279502884197169399375105820974944592307816406286208998628034825342
1170679821480865132823066470938446095505822317253594081284811174502841027019385211
0555964462294895493038196

Pussie :smiley:

1 Like

@ Bec a Fuel - Meow :smiley:

I think that result at posted #4 is more accurate, just G80 is faster than Cerb40 about 7%

G80: 00:03:28.0697560
G120: 00:10:41.8318678
G400: 00:01:19.3231995
Hydra+: 00:02:44.6428838
Cerb40 II: 00:03:49.9262320

So 3:49 and 3:28 seems 7% different. :smiley:

@ Dat - The problem here is that STM32F427 on three different boards has shown three very different (~30%) performances.

Strange. Single precision calculation has the same performance as double precision one. I guess FPU is not enabled on G80?..

What three boards and what speeds?

@ Gus - Look at the table attached.