SlideShare a Scribd company logo
1 of 45
How to add an optimization for C#
to JIT compiler
@EgorBo
Engineer at Microsoft
RuyJIT
From C# to machine code
2
C# IL IR ASM
float y = x / 2;
float y = x * 0.5f;
Let’s start with a simple optimization
3
vdivss (Latency: 20, R.Throughput: 14)
vmulss (Latency: 5, R.Throughput: 0.5)
^ for my MacBook (Haswell)
X / C
4
double y = x / 2;
float y = x / 2;
double y = x / 10;
double y = x / 8;
double y = x / -0.5;
float x = 48.665f;
Console.WriteLine(x / 10f); // 4.8665
Console.WriteLine(x * 0.1f); // 4.8665004
= x * 0.5
= x * 0.5f
= x * 0.1
= x * 0.125
= x * -2
X / C – let me optimize it in Roslyn!
5
static float GetSpeed(float distance, float time)
{
return distance / time;
}
...
float speed = GetSpeed(distance, 2);
Does Roslyn see “X/C” here?
NO! It doesn’t inline methods
Where to implement my custom optimization?
6
• Roslyn
+ No time constraints
+ It’s written in C# - easy to add optimizations, easy to debug and experiment
- No cross-assembly optimizations
- No CPU-dependent optimizations (IL is cross-platform)
- Doesn’t know how the code will look like after inlining, CSE, loop optimizations, etc.
- F# doesn’t use Roslyn
• JIT
+ Inlining, CSE, Loop opts, etc phases create more opportunities for optimizations
+ Knows everything about target platform, CPU capabilities
- Written in C++, difficult to experiment
- Time constraints for optimizations (probably not that important with Tiering)
• R2R (AOT)
+ No time constraints (some optimizations are really time consuming, e.g. full escape analysis)
- No CPU-dependent optimizations
- Will be most likely re-jitted anyway?
• ILLink Custom Step
+ Cross-assembly IL optimizations
+ Written in C#
+ We can manually de-virtualize types/methods/calls (if we know what we are doing)
- Still no inlining, CSE, etc..
RuyJIT: phases
7
RuyJIT: where to morph my X/C?
8
GenTree
GenTreeStmt
GenTree
(GenTreeOp)
BasicBlock
GenTree
(GenTreeDblCon)
GenTree
(GenTreeDblCon)
GenTree
(GenTreeCall)
GenTree
(GenTreeLclVar)
static float Test(float x)
{
return Foo(x, 3.14) * 2;
}
Compiler
MUL
2.0
3.14x
Foo
Dump IR via COMPlus_JitDump
Back to X/C: Morph IR Tree
static float Calculate(float distance, float v0)
{
return distance / 2 + v0;
}
ADD
DIV
LCL_VAR
(v0)
CNS_DBL
(2.0)
LCL_VAT
(distance)
ADD
MUL
LCL_VAR
(v0)
CNS_DBL
(0.5)
LCL_VAR
(distance)
Morph
Implementing the opt in morph.cpp
DIV
op2op1
Inspired by GT_ROL
13
/// <summary>
/// Rotates the specified value left by the specified number of bits.
/// </summary>
public static uint RotateLeft(uint value, int offset)
=> (value << offset) | (value >> (32 - offset));
OR ROL
/  / 
/  / 
LSH RSZ => x y
/  / 
x AND x AND
/  / 
y 31 ADD 31
/ 
NEG 32
|
y
rol eax, cl
15
"Hello".Length -> 5
static void Append(string str)
{
if (str.Length <= 2)
QuickAppend(str);
else
SlowAppend(str);
}
...
builder.Append("/>");
builder.QuickAppend("/>");
Inline, remove if (2 <= 2)
16
"Hello".Length => 5
ARR_LENGTH
CNS_STR
(ScpHnd, SconCPX)
CNS_INT
(5)
VM
case GT_ARR_LENGTH:
{
if (op1->OperIs(GT_CNS_STR))
{
GenTreeStrCon* strCon = op1->AsStrCon();
int len = info.compCompHnd->getStringLength(
strCon->gtScpHnd, strCon->gtSconCPX);
return gtNewIconNode(len);
}
break;
}
JIT <-> VM
Interface
op1
Access VM’s data from JIT
17
bool Test1(int x) => x % 4 == 0; bool Test1(int x) => x & 3 == 0;
MOD
EQ/NE
CNS_INT
(0)
CNS_INT
(4)
anything
op1 op2
op2op1
AND
EQ/NE
CNS_INT
(0)
CNS_INT
(3)
anything
op1 op2
op2op1
Roslyn and “!=“ operator
(unexpected optimization)
18
public static bool Test1(int x) => x != 42;
public static bool Test2(int x) => x != 0;
IL_0000: ldarg.0
IL_0001: ldc.i4.42
IL_0002: ceq
IL_0004: ldc.i4.0
IL_0005: ceq
IL_0007: ret
IL_0000: ldarg.0
IL_0001: ldc.i4.0
IL_0002: cgt.un
IL_0004: ret
return (uint)x > 0
return (x == 42) == false
JIT: ok, it’s GT_NE
JIT: what?.. ok, GT_GT
19
Math.Pow(x, 2)
Math.Pow(x, 1)
Math.Pow(x, 4)
Math.Pow(x, -1)
// can be added:
Math.Pow(42, 3)
Math.Pow(1, x)
Math.Pow(2, x)
Math.Pow(x, 0)
Math.Pow(x, 0.5)
| x * x
| x
| x * x * x * x
| 1 / x
| 74088
| 1
| exp2(x)
| 1
| sqrt(x)
Range check elimination
20
Range check elimination
21
public static void Test(int[] a)
{
a[0] = 4;
a[1] = 2;
for (int i = 0; i < a.Length; i++)
{
a[i] = 0;
}
a[1] = 2;
}
Range check elimination
22
public static void Test(int[] a)
{
if (a.Length <= 0) throw new IndexOutOfRangeException();
a[0] = 4;
if (a.Length <= 1) throw new IndexOutOfRangeException();
a[1] = 2;
for (int i = 0; i < a.Length; i++)
{
if (a.Length <= i) throw new IndexOutOfRangeException();
a[i] = 0;
}
if (a.Length <= 2) throw new IndexOutOfRangeException();
a[1] = 2;
}
Range check elimination
23
public static void Test(int[] a)
{
if (a.Length <= 0) throw new IndexOutOfRangeException();
a[0] = 4;
if (a.Length <= 1) throw new IndexOutOfRangeException();
a[1] = 2;
for (int i = 0; i < a.Length; i++)
{
if (a.Length <= i) throw new IndexOutOfRangeException();
a[i] = 0;
}
if (a.Length <= 2) throw new IndexOutOfRangeException();
a[1] = 2;
}
Range check elimination
24
public static void Test(int[] a)
{
if (a.Length <= 1) throw new IndexOutOfRangeException();
a[1] = 2;
if (a.Length <= 1) throw new IndexOutOfRangeException();
a[0] = 4;
for (int i = 0; i < a.Length; i++)
{
if (a.Length <= i) throw new IndexOutOfRangeException();
a[i] = 0;
}
if (a.Length <= 2) throw new IndexOutOfRangeException();
a[1] = 2;
}
Range check elimination
25
public static void Test(int[] a)
{
if (a.Length <= 1) throw new IndexOutOfRangeException();
a[1] = 2;
a[0] = 4;
for (int i = 0; i < a.Length; i++)
{
a[i] = 0;
}
a[1] = 2;
}
rangecheck.cpp (simplified)
26
void RangeCheck::OptimizeRangeCheck(GenTreeBoundsChk* bndsChk)
{
// Get the range for this index.
Range range = GetRange(...);
// If upper or lower limit is unknown, then return.
if (range.UpperLimit().IsUnknown() || range.LowerLimit().IsUnknown())
{
return;
}
// Is the range between the lower and upper bound values.
if (BetweenBounds(range, 0, bndsChk->gtArrLen))
{
m_pCompiler->optRemoveRangeCheck(treeParent, stmt);
}
return;
}
rangecheck.cpp (simplified)
27
void RangeCheck::OptimizeRangeCheck(GenTreeBoundsChk* bndsChk)
{
// Get the range for this index.
Range range = GetRange(...);
// If upper or lower limit is unknown, then return.
if (range.UpperLimit().IsUnknown() || range.LowerLimit().IsUnknown())
{
return;
}
// Is the range between the lower and upper bound values.
if (BetweenBounds(range, 0, bndsChk->gtArrLen))
{
m_pCompiler->optRemoveRangeCheck(treeParent, stmt);
}
return;
}
rangecheck.cpp (simplified)
28
void RangeCheck::OptimizeRangeCheck(GenTreeBoundsChk* bndsChk)
{
// Get the range for this index.
Range range = GetRange(...);
// If upper or lower limit is unknown, then return.
if (range.UpperLimit().IsUnknown() || range.LowerLimit().IsUnknown())
{
return;
}
// Is the range between the lower and upper bound values.
if (BetweenBounds(range, 0, bndsChk->gtArrLen))
{
m_pCompiler->optRemoveRangeCheck(treeParent, stmt);
}
return;
}
Byte array
29
private static readonly byte[] _data =
new byte[256] { 1, 2, 3, … };
public static byte GetByte(int i)
{
return _data[i];
}
Byte array: Roslyn hack (new feature)
30
private static ReadOnlySpan<byte> _data =>
new byte[256] { 1, 2, 3, … };
public static byte GetByte(int i)
{
return _data[i];
}
256
Byte array: byte index (my PR)
31
private static ReadOnlySpan<byte> _data =>
new byte[256] { 1, 2, 3, … };
public static byte GetByte(int i)
{
return _data[(byte)i];
}
Byte indexer will never go out of bounds!
rangecheck.cpp (simplified)
32
void RangeCheck::OptimizeRangeCheck(GenTreeBoundsChk* bndsChk)
{
// Get the range for this index.
Range range = GetRange(...);
// If upper or lower limit is unknown, then return.
if (range.UpperLimit().IsUnknown() || range.LowerLimit().IsUnknown())
{
return;
}
// Is the range between the lower and upper bound values.
if (BetweenBounds(range, 0, bndsChk->gtArrLen))
{
m_pCompiler->optRemoveRangeCheck(treeParent, stmt);
}
return;
}
[Byte.MinValue ... Byte.MaxValue]
ArrLen = 256
Loop optimizations
33
Loop Invariant Code Hoisting
34
public static bool Test(int[] a, int c)
{
for (int i = 0; i < a.Length; i++)
{
if (a[i] == c + 44)
return false;
}
return true;
}
Loop Invariant Code Hoisting
35
public static bool Test(int[] a, int c)
{
int tmp = c + 44;
for (int i = 0; i < a.Length; i++)
{
if (a[i] == tmp)
return false;
}
return true;
}
NYI: Loop-unrolling
36
public static int Test(int[] a)
{
int sum = 0;
for (int i = 0; i < a.Length; i++)
{
sum += a[i];
}
return sum;
}
NYI: Loop-unrolling
37
public static int Test(int[] a)
{
int sum = 0;
for (int i = 0; i < a.Length - 3; i += 4)
{
sum += a[i];
sum += a[i+1];
sum += a[i+2];
sum += a[i+3];
}
return sum;
}
NYI: Loop-unswitch
38
public static int Test(int[] a, bool condition)
{
int agr = 0;
for (int i = 0; i < a.Length; i++)
{
if (condition)
agr += a[i];
else
agr *= a[i];
}
return agr;
}
NYI: Loop-unswitch
39
public static int Test (int[] a, bool condition)
{
int agr = 0;
if (condition)
for (int i = 0; i < a.Length; i++)
agr += a[i];
else
for (int i = 0; i < a.Length; i++)
agr *= a[i];
return agr;
}
NYI: Loop-deletion
40
public static void Test()
{
for (int i = 0; i < 10; i++) { }
}
; Method Program:Test()
G_M44187_IG01:
G_M44187_IG02:
xor eax, eax
G_M44187_IG03:
inc eax
cmp eax, 10
jl SHORT G_M44187_IG03
G_M44187_IG04:
ret
; Total bytes of code: 10
NYI: Loop-deletion
41
public static void Test()
{
}
; Method Program:DeadLoop()
ret
; Total bytes of code: 1
NYI: InductiveRangeCheckElimination
42
public static void Zero1000Elements(int[] array)
{
for (int i = 0; i < 1000; i++)
array[i] = 0; // bound checks will be inserted here
}
NYI: InductiveRangeCheckElimination
43
public static void Zero1000Elements(int[] array)
{
int limit = Math.Min(array.Length, 1000);
for (int i = 0; i < limit; i++)
array[i] = 0; // bound checks are not needed here!
for (int i = limit; i < 1000; i++)
array[i] = 0; // bound checks are needed here
// so at least we could "zero" first `limit` elements without bound checks
}
NOTE: this LLVM optimization pass is not enabled by default in `opt –O2`.
Contributed by Azul developers (LLVM for JVM)
NYI: InductiveRangeCheckElimination
44
public static void Zero1000Elements(int[] array)
{
int limit = Math.Min(array.Length, 1000);
for (int i = 0; i < limit - 3; i += 4)
{
array[i] = 0;
array[i+1] = 0;
array[i+2] = 0;
array[i+3] = 0;
}
for (int i = limit; i < 1000; i++)
array[i] = 0; // bound checks are needed here
// so at least we could "zero" first `limit` elements without bound checks
}
Now we can even unroll the first loop!
NYI: InductiveRangeCheckElimination
45
public static void Zero1000Elements(int[] array)
{
int limit = Math.Min(array.Length, 1000);
memset(array, 0, limit);
for (int i = limit; i < 1000; i++)
array[i] = 0; // bound checks are needed here
// so at least we could "zero" first `limit` elements without bound checks
}
Or just replace with memset call
Q&A
Twitter: EgorBo

More Related Content

What's hot

CSharp Presentation
CSharp PresentationCSharp Presentation
CSharp PresentationVishwa Mohan
 
Programmable logic devices
Programmable logic devicesProgrammable logic devices
Programmable logic devicesAmmara Javed
 
Visual studio code
Visual studio codeVisual studio code
Visual studio codefizmhd
 
Embedded Systems: Lecture 13: Introduction to GNU Toolchain (Build Tools)
Embedded Systems: Lecture 13: Introduction to GNU Toolchain (Build Tools)Embedded Systems: Lecture 13: Introduction to GNU Toolchain (Build Tools)
Embedded Systems: Lecture 13: Introduction to GNU Toolchain (Build Tools)Ahmed El-Arabawy
 
Runtime Symbol Resolution
Runtime Symbol ResolutionRuntime Symbol Resolution
Runtime Symbol ResolutionKen Kawamoto
 
State of the Art OpenGL and Qt
State of the Art OpenGL and QtState of the Art OpenGL and Qt
State of the Art OpenGL and QtICS
 
Null safety in dart and flutter , the whole story!
Null safety in dart and flutter , the whole story!Null safety in dart and flutter , the whole story!
Null safety in dart and flutter , the whole story!Ahmed Abu Eldahab
 
2CPP14 - Abstraction
2CPP14 - Abstraction2CPP14 - Abstraction
2CPP14 - AbstractionMichael Heron
 
Embedded Systems: Lecture 8: Lab 1: Building a Raspberry Pi Based WiFi AP
Embedded Systems: Lecture 8: Lab 1: Building a Raspberry Pi Based WiFi APEmbedded Systems: Lecture 8: Lab 1: Building a Raspberry Pi Based WiFi AP
Embedded Systems: Lecture 8: Lab 1: Building a Raspberry Pi Based WiFi APAhmed El-Arabawy
 
Abstract Class & Abstract Method in Core Java
Abstract Class & Abstract Method in Core JavaAbstract Class & Abstract Method in Core Java
Abstract Class & Abstract Method in Core JavaMOHIT AGARWAL
 
transistor transistor logic
transistor transistor logictransistor transistor logic
transistor transistor logicmansi acharya
 
Vhdl identifiers,data types
Vhdl identifiers,data typesVhdl identifiers,data types
Vhdl identifiers,data typesMadhuriMulik1
 
What is Visual Studio Code?
What is Visual Studio Code?What is Visual Studio Code?
What is Visual Studio Code?Mindfire LLC
 
Looping statements in Java
Looping statements in JavaLooping statements in Java
Looping statements in JavaJin Castor
 

What's hot (20)

CSharp Presentation
CSharp PresentationCSharp Presentation
CSharp Presentation
 
Programmable logic devices
Programmable logic devicesProgrammable logic devices
Programmable logic devices
 
Kivy report
Kivy reportKivy report
Kivy report
 
Java applet
Java appletJava applet
Java applet
 
Qt programming-using-cpp
Qt programming-using-cppQt programming-using-cpp
Qt programming-using-cpp
 
Visual studio code
Visual studio codeVisual studio code
Visual studio code
 
Embedded Systems: Lecture 13: Introduction to GNU Toolchain (Build Tools)
Embedded Systems: Lecture 13: Introduction to GNU Toolchain (Build Tools)Embedded Systems: Lecture 13: Introduction to GNU Toolchain (Build Tools)
Embedded Systems: Lecture 13: Introduction to GNU Toolchain (Build Tools)
 
Runtime Symbol Resolution
Runtime Symbol ResolutionRuntime Symbol Resolution
Runtime Symbol Resolution
 
State of the Art OpenGL and Qt
State of the Art OpenGL and QtState of the Art OpenGL and Qt
State of the Art OpenGL and Qt
 
Null safety in dart and flutter , the whole story!
Null safety in dart and flutter , the whole story!Null safety in dart and flutter , the whole story!
Null safety in dart and flutter , the whole story!
 
2CPP14 - Abstraction
2CPP14 - Abstraction2CPP14 - Abstraction
2CPP14 - Abstraction
 
Embedded Systems: Lecture 8: Lab 1: Building a Raspberry Pi Based WiFi AP
Embedded Systems: Lecture 8: Lab 1: Building a Raspberry Pi Based WiFi APEmbedded Systems: Lecture 8: Lab 1: Building a Raspberry Pi Based WiFi AP
Embedded Systems: Lecture 8: Lab 1: Building a Raspberry Pi Based WiFi AP
 
Abstract Class & Abstract Method in Core Java
Abstract Class & Abstract Method in Core JavaAbstract Class & Abstract Method in Core Java
Abstract Class & Abstract Method in Core Java
 
Final keyword in java
Final keyword in javaFinal keyword in java
Final keyword in java
 
transistor transistor logic
transistor transistor logictransistor transistor logic
transistor transistor logic
 
Qt for beginners
Qt for beginnersQt for beginners
Qt for beginners
 
Vhdl identifiers,data types
Vhdl identifiers,data typesVhdl identifiers,data types
Vhdl identifiers,data types
 
What is Visual Studio Code?
What is Visual Studio Code?What is Visual Studio Code?
What is Visual Studio Code?
 
Looping statements in Java
Looping statements in JavaLooping statements in Java
Looping statements in Java
 
Introduction to kotlin
Introduction to kotlinIntroduction to kotlin
Introduction to kotlin
 

Similar to How to add an optimization for C# to RyuJIT

Egor Bogatov - .NET Core intrinsics and other micro-optimizations
Egor Bogatov - .NET Core intrinsics and other micro-optimizationsEgor Bogatov - .NET Core intrinsics and other micro-optimizations
Egor Bogatov - .NET Core intrinsics and other micro-optimizationsEgor Bogatov
 
PVS-Studio team experience: checking various open source projects, or mistake...
PVS-Studio team experience: checking various open source projects, or mistake...PVS-Studio team experience: checking various open source projects, or mistake...
PVS-Studio team experience: checking various open source projects, or mistake...Andrey Karpov
 
Halide tutorial 2019
Halide tutorial 2019Halide tutorial 2019
Halide tutorial 2019Champ Yen
 
Java Performance Puzzlers
Java Performance PuzzlersJava Performance Puzzlers
Java Performance PuzzlersDoug Hawkins
 
JVM Mechanics: When Does the JVM JIT & Deoptimize?
JVM Mechanics: When Does the JVM JIT & Deoptimize?JVM Mechanics: When Does the JVM JIT & Deoptimize?
JVM Mechanics: When Does the JVM JIT & Deoptimize?Doug Hawkins
 
C++ lectures all chapters in one slide.pptx
C++ lectures all chapters in one slide.pptxC++ lectures all chapters in one slide.pptx
C++ lectures all chapters in one slide.pptxssuser3cbb4c
 
ch04-conditional-execution.ppt
ch04-conditional-execution.pptch04-conditional-execution.ppt
ch04-conditional-execution.pptMahyuddin8
 
Pythran: Static compiler for high performance by Mehdi Amini PyData SV 2014
Pythran: Static compiler for high performance by Mehdi Amini PyData SV 2014Pythran: Static compiler for high performance by Mehdi Amini PyData SV 2014
Pythran: Static compiler for high performance by Mehdi Amini PyData SV 2014PyData
 
Whats new in_csharp4
Whats new in_csharp4Whats new in_csharp4
Whats new in_csharp4Abed Bukhari
 
Hypercritical C++ Code Review
Hypercritical C++ Code ReviewHypercritical C++ Code Review
Hypercritical C++ Code ReviewAndrey Karpov
 
Chainer-Compiler 動かしてみた
Chainer-Compiler 動かしてみたChainer-Compiler 動かしてみた
Chainer-Compiler 動かしてみたAkira Maruoka
 
شرح مقرر البرمجة 2 لغة جافا - الوحدة الثالثة
شرح مقرر البرمجة 2   لغة جافا - الوحدة الثالثةشرح مقرر البرمجة 2   لغة جافا - الوحدة الثالثة
شرح مقرر البرمجة 2 لغة جافا - الوحدة الثالثةجامعة القدس المفتوحة
 
Let’s talk about microbenchmarking
Let’s talk about microbenchmarkingLet’s talk about microbenchmarking
Let’s talk about microbenchmarkingAndrey Akinshin
 
"Quantum" performance effects
"Quantum" performance effects"Quantum" performance effects
"Quantum" performance effectsSergey Kuksenko
 
How Data Flow analysis works in a static code analyzer
How Data Flow analysis works in a static code analyzerHow Data Flow analysis works in a static code analyzer
How Data Flow analysis works in a static code analyzerAndrey Karpov
 
Introduction to NumPy for Machine Learning Programmers
Introduction to NumPy for Machine Learning ProgrammersIntroduction to NumPy for Machine Learning Programmers
Introduction to NumPy for Machine Learning ProgrammersKimikazu Kato
 
Operator overloading2
Operator overloading2Operator overloading2
Operator overloading2zindadili
 

Similar to How to add an optimization for C# to RyuJIT (20)

Egor Bogatov - .NET Core intrinsics and other micro-optimizations
Egor Bogatov - .NET Core intrinsics and other micro-optimizationsEgor Bogatov - .NET Core intrinsics and other micro-optimizations
Egor Bogatov - .NET Core intrinsics and other micro-optimizations
 
PVS-Studio team experience: checking various open source projects, or mistake...
PVS-Studio team experience: checking various open source projects, or mistake...PVS-Studio team experience: checking various open source projects, or mistake...
PVS-Studio team experience: checking various open source projects, or mistake...
 
Halide tutorial 2019
Halide tutorial 2019Halide tutorial 2019
Halide tutorial 2019
 
Java Performance Puzzlers
Java Performance PuzzlersJava Performance Puzzlers
Java Performance Puzzlers
 
JVM Mechanics: When Does the JVM JIT & Deoptimize?
JVM Mechanics: When Does the JVM JIT & Deoptimize?JVM Mechanics: When Does the JVM JIT & Deoptimize?
JVM Mechanics: When Does the JVM JIT & Deoptimize?
 
C++ lectures all chapters in one slide.pptx
C++ lectures all chapters in one slide.pptxC++ lectures all chapters in one slide.pptx
C++ lectures all chapters in one slide.pptx
 
White box-sol
White box-solWhite box-sol
White box-sol
 
ch04-conditional-execution.ppt
ch04-conditional-execution.pptch04-conditional-execution.ppt
ch04-conditional-execution.ppt
 
Pythran: Static compiler for high performance by Mehdi Amini PyData SV 2014
Pythran: Static compiler for high performance by Mehdi Amini PyData SV 2014Pythran: Static compiler for high performance by Mehdi Amini PyData SV 2014
Pythran: Static compiler for high performance by Mehdi Amini PyData SV 2014
 
Whats new in_csharp4
Whats new in_csharp4Whats new in_csharp4
Whats new in_csharp4
 
Exploiting vectorization with ISPC
Exploiting vectorization with ISPCExploiting vectorization with ISPC
Exploiting vectorization with ISPC
 
Hypercritical C++ Code Review
Hypercritical C++ Code ReviewHypercritical C++ Code Review
Hypercritical C++ Code Review
 
Chainer-Compiler 動かしてみた
Chainer-Compiler 動かしてみたChainer-Compiler 動かしてみた
Chainer-Compiler 動かしてみた
 
شرح مقرر البرمجة 2 لغة جافا - الوحدة الثالثة
شرح مقرر البرمجة 2   لغة جافا - الوحدة الثالثةشرح مقرر البرمجة 2   لغة جافا - الوحدة الثالثة
شرح مقرر البرمجة 2 لغة جافا - الوحدة الثالثة
 
Let’s talk about microbenchmarking
Let’s talk about microbenchmarkingLet’s talk about microbenchmarking
Let’s talk about microbenchmarking
 
"Quantum" performance effects
"Quantum" performance effects"Quantum" performance effects
"Quantum" performance effects
 
How Data Flow analysis works in a static code analyzer
How Data Flow analysis works in a static code analyzerHow Data Flow analysis works in a static code analyzer
How Data Flow analysis works in a static code analyzer
 
Introduction to NumPy for Machine Learning Programmers
Introduction to NumPy for Machine Learning ProgrammersIntroduction to NumPy for Machine Learning Programmers
Introduction to NumPy for Machine Learning Programmers
 
Operator overloading2
Operator overloading2Operator overloading2
Operator overloading2
 
10. Recursion
10. Recursion10. Recursion
10. Recursion
 

Recently uploaded

Steps To Getting Up And Running Quickly With MyTimeClock Employee Scheduling ...
Steps To Getting Up And Running Quickly With MyTimeClock Employee Scheduling ...Steps To Getting Up And Running Quickly With MyTimeClock Employee Scheduling ...
Steps To Getting Up And Running Quickly With MyTimeClock Employee Scheduling ...MyIntelliSource, Inc.
 
HR Software Buyers Guide in 2024 - HRSoftware.com
HR Software Buyers Guide in 2024 - HRSoftware.comHR Software Buyers Guide in 2024 - HRSoftware.com
HR Software Buyers Guide in 2024 - HRSoftware.comFatema Valibhai
 
Unveiling the Tech Salsa of LAMs with Janus in Real-Time Applications
Unveiling the Tech Salsa of LAMs with Janus in Real-Time ApplicationsUnveiling the Tech Salsa of LAMs with Janus in Real-Time Applications
Unveiling the Tech Salsa of LAMs with Janus in Real-Time ApplicationsAlberto González Trastoy
 
Learn the Fundamentals of XCUITest Framework_ A Beginner's Guide.pdf
Learn the Fundamentals of XCUITest Framework_ A Beginner's Guide.pdfLearn the Fundamentals of XCUITest Framework_ A Beginner's Guide.pdf
Learn the Fundamentals of XCUITest Framework_ A Beginner's Guide.pdfkalichargn70th171
 
Try MyIntelliAccount Cloud Accounting Software As A Service Solution Risk Fre...
Try MyIntelliAccount Cloud Accounting Software As A Service Solution Risk Fre...Try MyIntelliAccount Cloud Accounting Software As A Service Solution Risk Fre...
Try MyIntelliAccount Cloud Accounting Software As A Service Solution Risk Fre...MyIntelliSource, Inc.
 
Diamond Application Development Crafting Solutions with Precision
Diamond Application Development Crafting Solutions with PrecisionDiamond Application Development Crafting Solutions with Precision
Diamond Application Development Crafting Solutions with PrecisionSolGuruz
 
(Genuine) Escort Service Lucknow | Starting ₹,5K To @25k with A/C 🧑🏽‍❤️‍🧑🏻 89...
(Genuine) Escort Service Lucknow | Starting ₹,5K To @25k with A/C 🧑🏽‍❤️‍🧑🏻 89...(Genuine) Escort Service Lucknow | Starting ₹,5K To @25k with A/C 🧑🏽‍❤️‍🧑🏻 89...
(Genuine) Escort Service Lucknow | Starting ₹,5K To @25k with A/C 🧑🏽‍❤️‍🧑🏻 89...gurkirankumar98700
 
The Ultimate Test Automation Guide_ Best Practices and Tips.pdf
The Ultimate Test Automation Guide_ Best Practices and Tips.pdfThe Ultimate Test Automation Guide_ Best Practices and Tips.pdf
The Ultimate Test Automation Guide_ Best Practices and Tips.pdfkalichargn70th171
 
Shapes for Sharing between Graph Data Spaces - and Epistemic Querying of RDF-...
Shapes for Sharing between Graph Data Spaces - and Epistemic Querying of RDF-...Shapes for Sharing between Graph Data Spaces - and Epistemic Querying of RDF-...
Shapes for Sharing between Graph Data Spaces - and Epistemic Querying of RDF-...Steffen Staab
 
A Secure and Reliable Document Management System is Essential.docx
A Secure and Reliable Document Management System is Essential.docxA Secure and Reliable Document Management System is Essential.docx
A Secure and Reliable Document Management System is Essential.docxComplianceQuest1
 
Professional Resume Template for Software Developers
Professional Resume Template for Software DevelopersProfessional Resume Template for Software Developers
Professional Resume Template for Software DevelopersVinodh Ram
 
How To Use Server-Side Rendering with Nuxt.js
How To Use Server-Side Rendering with Nuxt.jsHow To Use Server-Side Rendering with Nuxt.js
How To Use Server-Side Rendering with Nuxt.jsAndolasoft Inc
 
Clustering techniques data mining book ....
Clustering techniques data mining book ....Clustering techniques data mining book ....
Clustering techniques data mining book ....ShaimaaMohamedGalal
 
The Real-World Challenges of Medical Device Cybersecurity- Mitigating Vulnera...
The Real-World Challenges of Medical Device Cybersecurity- Mitigating Vulnera...The Real-World Challenges of Medical Device Cybersecurity- Mitigating Vulnera...
The Real-World Challenges of Medical Device Cybersecurity- Mitigating Vulnera...ICS
 
CALL ON ➥8923113531 🔝Call Girls Kakori Lucknow best sexual service Online ☂️
CALL ON ➥8923113531 🔝Call Girls Kakori Lucknow best sexual service Online  ☂️CALL ON ➥8923113531 🔝Call Girls Kakori Lucknow best sexual service Online  ☂️
CALL ON ➥8923113531 🔝Call Girls Kakori Lucknow best sexual service Online ☂️anilsa9823
 
Software Quality Assurance Interview Questions
Software Quality Assurance Interview QuestionsSoftware Quality Assurance Interview Questions
Software Quality Assurance Interview QuestionsArshad QA
 
W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...
W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...
W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...panagenda
 

Recently uploaded (20)

Steps To Getting Up And Running Quickly With MyTimeClock Employee Scheduling ...
Steps To Getting Up And Running Quickly With MyTimeClock Employee Scheduling ...Steps To Getting Up And Running Quickly With MyTimeClock Employee Scheduling ...
Steps To Getting Up And Running Quickly With MyTimeClock Employee Scheduling ...
 
HR Software Buyers Guide in 2024 - HRSoftware.com
HR Software Buyers Guide in 2024 - HRSoftware.comHR Software Buyers Guide in 2024 - HRSoftware.com
HR Software Buyers Guide in 2024 - HRSoftware.com
 
Unveiling the Tech Salsa of LAMs with Janus in Real-Time Applications
Unveiling the Tech Salsa of LAMs with Janus in Real-Time ApplicationsUnveiling the Tech Salsa of LAMs with Janus in Real-Time Applications
Unveiling the Tech Salsa of LAMs with Janus in Real-Time Applications
 
Learn the Fundamentals of XCUITest Framework_ A Beginner's Guide.pdf
Learn the Fundamentals of XCUITest Framework_ A Beginner's Guide.pdfLearn the Fundamentals of XCUITest Framework_ A Beginner's Guide.pdf
Learn the Fundamentals of XCUITest Framework_ A Beginner's Guide.pdf
 
Try MyIntelliAccount Cloud Accounting Software As A Service Solution Risk Fre...
Try MyIntelliAccount Cloud Accounting Software As A Service Solution Risk Fre...Try MyIntelliAccount Cloud Accounting Software As A Service Solution Risk Fre...
Try MyIntelliAccount Cloud Accounting Software As A Service Solution Risk Fre...
 
Diamond Application Development Crafting Solutions with Precision
Diamond Application Development Crafting Solutions with PrecisionDiamond Application Development Crafting Solutions with Precision
Diamond Application Development Crafting Solutions with Precision
 
(Genuine) Escort Service Lucknow | Starting ₹,5K To @25k with A/C 🧑🏽‍❤️‍🧑🏻 89...
(Genuine) Escort Service Lucknow | Starting ₹,5K To @25k with A/C 🧑🏽‍❤️‍🧑🏻 89...(Genuine) Escort Service Lucknow | Starting ₹,5K To @25k with A/C 🧑🏽‍❤️‍🧑🏻 89...
(Genuine) Escort Service Lucknow | Starting ₹,5K To @25k with A/C 🧑🏽‍❤️‍🧑🏻 89...
 
The Ultimate Test Automation Guide_ Best Practices and Tips.pdf
The Ultimate Test Automation Guide_ Best Practices and Tips.pdfThe Ultimate Test Automation Guide_ Best Practices and Tips.pdf
The Ultimate Test Automation Guide_ Best Practices and Tips.pdf
 
Shapes for Sharing between Graph Data Spaces - and Epistemic Querying of RDF-...
Shapes for Sharing between Graph Data Spaces - and Epistemic Querying of RDF-...Shapes for Sharing between Graph Data Spaces - and Epistemic Querying of RDF-...
Shapes for Sharing between Graph Data Spaces - and Epistemic Querying of RDF-...
 
A Secure and Reliable Document Management System is Essential.docx
A Secure and Reliable Document Management System is Essential.docxA Secure and Reliable Document Management System is Essential.docx
A Secure and Reliable Document Management System is Essential.docx
 
Professional Resume Template for Software Developers
Professional Resume Template for Software DevelopersProfessional Resume Template for Software Developers
Professional Resume Template for Software Developers
 
How To Use Server-Side Rendering with Nuxt.js
How To Use Server-Side Rendering with Nuxt.jsHow To Use Server-Side Rendering with Nuxt.js
How To Use Server-Side Rendering with Nuxt.js
 
Clustering techniques data mining book ....
Clustering techniques data mining book ....Clustering techniques data mining book ....
Clustering techniques data mining book ....
 
Vip Call Girls Noida ➡️ Delhi ➡️ 9999965857 No Advance 24HRS Live
Vip Call Girls Noida ➡️ Delhi ➡️ 9999965857 No Advance 24HRS LiveVip Call Girls Noida ➡️ Delhi ➡️ 9999965857 No Advance 24HRS Live
Vip Call Girls Noida ➡️ Delhi ➡️ 9999965857 No Advance 24HRS Live
 
The Real-World Challenges of Medical Device Cybersecurity- Mitigating Vulnera...
The Real-World Challenges of Medical Device Cybersecurity- Mitigating Vulnera...The Real-World Challenges of Medical Device Cybersecurity- Mitigating Vulnera...
The Real-World Challenges of Medical Device Cybersecurity- Mitigating Vulnera...
 
CALL ON ➥8923113531 🔝Call Girls Kakori Lucknow best sexual service Online ☂️
CALL ON ➥8923113531 🔝Call Girls Kakori Lucknow best sexual service Online  ☂️CALL ON ➥8923113531 🔝Call Girls Kakori Lucknow best sexual service Online  ☂️
CALL ON ➥8923113531 🔝Call Girls Kakori Lucknow best sexual service Online ☂️
 
Software Quality Assurance Interview Questions
Software Quality Assurance Interview QuestionsSoftware Quality Assurance Interview Questions
Software Quality Assurance Interview Questions
 
CHEAP Call Girls in Pushp Vihar (-DELHI )🔝 9953056974🔝(=)/CALL GIRLS SERVICE
CHEAP Call Girls in Pushp Vihar (-DELHI )🔝 9953056974🔝(=)/CALL GIRLS SERVICECHEAP Call Girls in Pushp Vihar (-DELHI )🔝 9953056974🔝(=)/CALL GIRLS SERVICE
CHEAP Call Girls in Pushp Vihar (-DELHI )🔝 9953056974🔝(=)/CALL GIRLS SERVICE
 
W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...
W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...
W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...
 
Microsoft AI Transformation Partner Playbook.pdf
Microsoft AI Transformation Partner Playbook.pdfMicrosoft AI Transformation Partner Playbook.pdf
Microsoft AI Transformation Partner Playbook.pdf
 

How to add an optimization for C# to RyuJIT

  • 1. How to add an optimization for C# to JIT compiler @EgorBo Engineer at Microsoft
  • 2. RuyJIT From C# to machine code 2 C# IL IR ASM
  • 3. float y = x / 2; float y = x * 0.5f; Let’s start with a simple optimization 3 vdivss (Latency: 20, R.Throughput: 14) vmulss (Latency: 5, R.Throughput: 0.5) ^ for my MacBook (Haswell)
  • 4. X / C 4 double y = x / 2; float y = x / 2; double y = x / 10; double y = x / 8; double y = x / -0.5; float x = 48.665f; Console.WriteLine(x / 10f); // 4.8665 Console.WriteLine(x * 0.1f); // 4.8665004 = x * 0.5 = x * 0.5f = x * 0.1 = x * 0.125 = x * -2
  • 5. X / C – let me optimize it in Roslyn! 5 static float GetSpeed(float distance, float time) { return distance / time; } ... float speed = GetSpeed(distance, 2); Does Roslyn see “X/C” here? NO! It doesn’t inline methods
  • 6. Where to implement my custom optimization? 6 • Roslyn + No time constraints + It’s written in C# - easy to add optimizations, easy to debug and experiment - No cross-assembly optimizations - No CPU-dependent optimizations (IL is cross-platform) - Doesn’t know how the code will look like after inlining, CSE, loop optimizations, etc. - F# doesn’t use Roslyn • JIT + Inlining, CSE, Loop opts, etc phases create more opportunities for optimizations + Knows everything about target platform, CPU capabilities - Written in C++, difficult to experiment - Time constraints for optimizations (probably not that important with Tiering) • R2R (AOT) + No time constraints (some optimizations are really time consuming, e.g. full escape analysis) - No CPU-dependent optimizations - Will be most likely re-jitted anyway? • ILLink Custom Step + Cross-assembly IL optimizations + Written in C# + We can manually de-virtualize types/methods/calls (if we know what we are doing) - Still no inlining, CSE, etc..
  • 8. RuyJIT: where to morph my X/C? 8
  • 10. Dump IR via COMPlus_JitDump
  • 11. Back to X/C: Morph IR Tree static float Calculate(float distance, float v0) { return distance / 2 + v0; } ADD DIV LCL_VAR (v0) CNS_DBL (2.0) LCL_VAT (distance) ADD MUL LCL_VAR (v0) CNS_DBL (0.5) LCL_VAR (distance) Morph
  • 12. Implementing the opt in morph.cpp DIV op2op1
  • 13. Inspired by GT_ROL 13 /// <summary> /// Rotates the specified value left by the specified number of bits. /// </summary> public static uint RotateLeft(uint value, int offset) => (value << offset) | (value >> (32 - offset)); OR ROL / / / / LSH RSZ => x y / / x AND x AND / / y 31 ADD 31 / NEG 32 | y rol eax, cl
  • 14. 15 "Hello".Length -> 5 static void Append(string str) { if (str.Length <= 2) QuickAppend(str); else SlowAppend(str); } ... builder.Append("/>"); builder.QuickAppend("/>"); Inline, remove if (2 <= 2)
  • 15. 16 "Hello".Length => 5 ARR_LENGTH CNS_STR (ScpHnd, SconCPX) CNS_INT (5) VM case GT_ARR_LENGTH: { if (op1->OperIs(GT_CNS_STR)) { GenTreeStrCon* strCon = op1->AsStrCon(); int len = info.compCompHnd->getStringLength( strCon->gtScpHnd, strCon->gtSconCPX); return gtNewIconNode(len); } break; } JIT <-> VM Interface op1 Access VM’s data from JIT
  • 16. 17 bool Test1(int x) => x % 4 == 0; bool Test1(int x) => x & 3 == 0; MOD EQ/NE CNS_INT (0) CNS_INT (4) anything op1 op2 op2op1 AND EQ/NE CNS_INT (0) CNS_INT (3) anything op1 op2 op2op1
  • 17. Roslyn and “!=“ operator (unexpected optimization) 18 public static bool Test1(int x) => x != 42; public static bool Test2(int x) => x != 0; IL_0000: ldarg.0 IL_0001: ldc.i4.42 IL_0002: ceq IL_0004: ldc.i4.0 IL_0005: ceq IL_0007: ret IL_0000: ldarg.0 IL_0001: ldc.i4.0 IL_0002: cgt.un IL_0004: ret return (uint)x > 0 return (x == 42) == false JIT: ok, it’s GT_NE JIT: what?.. ok, GT_GT
  • 18. 19 Math.Pow(x, 2) Math.Pow(x, 1) Math.Pow(x, 4) Math.Pow(x, -1) // can be added: Math.Pow(42, 3) Math.Pow(1, x) Math.Pow(2, x) Math.Pow(x, 0) Math.Pow(x, 0.5) | x * x | x | x * x * x * x | 1 / x | 74088 | 1 | exp2(x) | 1 | sqrt(x)
  • 20. Range check elimination 21 public static void Test(int[] a) { a[0] = 4; a[1] = 2; for (int i = 0; i < a.Length; i++) { a[i] = 0; } a[1] = 2; }
  • 21. Range check elimination 22 public static void Test(int[] a) { if (a.Length <= 0) throw new IndexOutOfRangeException(); a[0] = 4; if (a.Length <= 1) throw new IndexOutOfRangeException(); a[1] = 2; for (int i = 0; i < a.Length; i++) { if (a.Length <= i) throw new IndexOutOfRangeException(); a[i] = 0; } if (a.Length <= 2) throw new IndexOutOfRangeException(); a[1] = 2; }
  • 22. Range check elimination 23 public static void Test(int[] a) { if (a.Length <= 0) throw new IndexOutOfRangeException(); a[0] = 4; if (a.Length <= 1) throw new IndexOutOfRangeException(); a[1] = 2; for (int i = 0; i < a.Length; i++) { if (a.Length <= i) throw new IndexOutOfRangeException(); a[i] = 0; } if (a.Length <= 2) throw new IndexOutOfRangeException(); a[1] = 2; }
  • 23. Range check elimination 24 public static void Test(int[] a) { if (a.Length <= 1) throw new IndexOutOfRangeException(); a[1] = 2; if (a.Length <= 1) throw new IndexOutOfRangeException(); a[0] = 4; for (int i = 0; i < a.Length; i++) { if (a.Length <= i) throw new IndexOutOfRangeException(); a[i] = 0; } if (a.Length <= 2) throw new IndexOutOfRangeException(); a[1] = 2; }
  • 24. Range check elimination 25 public static void Test(int[] a) { if (a.Length <= 1) throw new IndexOutOfRangeException(); a[1] = 2; a[0] = 4; for (int i = 0; i < a.Length; i++) { a[i] = 0; } a[1] = 2; }
  • 25. rangecheck.cpp (simplified) 26 void RangeCheck::OptimizeRangeCheck(GenTreeBoundsChk* bndsChk) { // Get the range for this index. Range range = GetRange(...); // If upper or lower limit is unknown, then return. if (range.UpperLimit().IsUnknown() || range.LowerLimit().IsUnknown()) { return; } // Is the range between the lower and upper bound values. if (BetweenBounds(range, 0, bndsChk->gtArrLen)) { m_pCompiler->optRemoveRangeCheck(treeParent, stmt); } return; }
  • 26. rangecheck.cpp (simplified) 27 void RangeCheck::OptimizeRangeCheck(GenTreeBoundsChk* bndsChk) { // Get the range for this index. Range range = GetRange(...); // If upper or lower limit is unknown, then return. if (range.UpperLimit().IsUnknown() || range.LowerLimit().IsUnknown()) { return; } // Is the range between the lower and upper bound values. if (BetweenBounds(range, 0, bndsChk->gtArrLen)) { m_pCompiler->optRemoveRangeCheck(treeParent, stmt); } return; }
  • 27. rangecheck.cpp (simplified) 28 void RangeCheck::OptimizeRangeCheck(GenTreeBoundsChk* bndsChk) { // Get the range for this index. Range range = GetRange(...); // If upper or lower limit is unknown, then return. if (range.UpperLimit().IsUnknown() || range.LowerLimit().IsUnknown()) { return; } // Is the range between the lower and upper bound values. if (BetweenBounds(range, 0, bndsChk->gtArrLen)) { m_pCompiler->optRemoveRangeCheck(treeParent, stmt); } return; }
  • 28. Byte array 29 private static readonly byte[] _data = new byte[256] { 1, 2, 3, … }; public static byte GetByte(int i) { return _data[i]; }
  • 29. Byte array: Roslyn hack (new feature) 30 private static ReadOnlySpan<byte> _data => new byte[256] { 1, 2, 3, … }; public static byte GetByte(int i) { return _data[i]; } 256
  • 30. Byte array: byte index (my PR) 31 private static ReadOnlySpan<byte> _data => new byte[256] { 1, 2, 3, … }; public static byte GetByte(int i) { return _data[(byte)i]; } Byte indexer will never go out of bounds!
  • 31. rangecheck.cpp (simplified) 32 void RangeCheck::OptimizeRangeCheck(GenTreeBoundsChk* bndsChk) { // Get the range for this index. Range range = GetRange(...); // If upper or lower limit is unknown, then return. if (range.UpperLimit().IsUnknown() || range.LowerLimit().IsUnknown()) { return; } // Is the range between the lower and upper bound values. if (BetweenBounds(range, 0, bndsChk->gtArrLen)) { m_pCompiler->optRemoveRangeCheck(treeParent, stmt); } return; } [Byte.MinValue ... Byte.MaxValue] ArrLen = 256
  • 33. Loop Invariant Code Hoisting 34 public static bool Test(int[] a, int c) { for (int i = 0; i < a.Length; i++) { if (a[i] == c + 44) return false; } return true; }
  • 34. Loop Invariant Code Hoisting 35 public static bool Test(int[] a, int c) { int tmp = c + 44; for (int i = 0; i < a.Length; i++) { if (a[i] == tmp) return false; } return true; }
  • 35. NYI: Loop-unrolling 36 public static int Test(int[] a) { int sum = 0; for (int i = 0; i < a.Length; i++) { sum += a[i]; } return sum; }
  • 36. NYI: Loop-unrolling 37 public static int Test(int[] a) { int sum = 0; for (int i = 0; i < a.Length - 3; i += 4) { sum += a[i]; sum += a[i+1]; sum += a[i+2]; sum += a[i+3]; } return sum; }
  • 37. NYI: Loop-unswitch 38 public static int Test(int[] a, bool condition) { int agr = 0; for (int i = 0; i < a.Length; i++) { if (condition) agr += a[i]; else agr *= a[i]; } return agr; }
  • 38. NYI: Loop-unswitch 39 public static int Test (int[] a, bool condition) { int agr = 0; if (condition) for (int i = 0; i < a.Length; i++) agr += a[i]; else for (int i = 0; i < a.Length; i++) agr *= a[i]; return agr; }
  • 39. NYI: Loop-deletion 40 public static void Test() { for (int i = 0; i < 10; i++) { } } ; Method Program:Test() G_M44187_IG01: G_M44187_IG02: xor eax, eax G_M44187_IG03: inc eax cmp eax, 10 jl SHORT G_M44187_IG03 G_M44187_IG04: ret ; Total bytes of code: 10
  • 40. NYI: Loop-deletion 41 public static void Test() { } ; Method Program:DeadLoop() ret ; Total bytes of code: 1
  • 41. NYI: InductiveRangeCheckElimination 42 public static void Zero1000Elements(int[] array) { for (int i = 0; i < 1000; i++) array[i] = 0; // bound checks will be inserted here }
  • 42. NYI: InductiveRangeCheckElimination 43 public static void Zero1000Elements(int[] array) { int limit = Math.Min(array.Length, 1000); for (int i = 0; i < limit; i++) array[i] = 0; // bound checks are not needed here! for (int i = limit; i < 1000; i++) array[i] = 0; // bound checks are needed here // so at least we could "zero" first `limit` elements without bound checks } NOTE: this LLVM optimization pass is not enabled by default in `opt –O2`. Contributed by Azul developers (LLVM for JVM)
  • 43. NYI: InductiveRangeCheckElimination 44 public static void Zero1000Elements(int[] array) { int limit = Math.Min(array.Length, 1000); for (int i = 0; i < limit - 3; i += 4) { array[i] = 0; array[i+1] = 0; array[i+2] = 0; array[i+3] = 0; } for (int i = limit; i < 1000; i++) array[i] = 0; // bound checks are needed here // so at least we could "zero" first `limit` elements without bound checks } Now we can even unroll the first loop!
  • 44. NYI: InductiveRangeCheckElimination 45 public static void Zero1000Elements(int[] array) { int limit = Math.Min(array.Length, 1000); memset(array, 0, limit); for (int i = limit; i < 1000; i++) array[i] = 0; // bound checks are needed here // so at least we could "zero" first `limit` elements without bound checks } Or just replace with memset call