[Jitblit] Progress (finally)

Daniel Amelang daniel.amelang at gmail.com
Tue Apr 17 00:13:29 PDT 2007


Hey everyone,

Sorry for the lack of progress lately. Our newborn got pretty sick
about 2 weeks ago and had to stay in the hospital for a few days,
which threw us off quite a bit. Oh, and then classes started up again.
Luckily I'm only taking one class (my last class, yay!), so it
shouldn't take too much of my time.

So now I'm back to hacking on this, and have some minor progress to
report. I took Carl's jitblit version of cairo and fixed it to
correctly implement OVER. So the code was now at least correct, albeit
still really slow. Then I rewrote the jolt code so that the inner and
outer image iteration loops are moved into the jitted code, which
eliminated the previous overhead of the function call for every pixel.
So now there's only one function call per composite operation. If
anyone wants to follow along at home, I'm attaching the patch you
should apply to Carl's cairo-jitblit git repository. I'll be setting
up my own cairo-jitblit git repository at freedesktop.org Real Soon
(TM).

I think the next step should be cleaning up cairo-jitblit so that
libjolt is statically linked into the cairo shared library. This will
make it easier to start using cairo-jitblit from actual applications
like cairo-perf or what-not. In the near future, I'd like to make it
pretty easy for anyone to run a jitblit version of cairo. On the cairo
end, we can make it as easy as passing in a configure option. Getting
a working libjolt is a little more complicated, but maybe I can send
Ian some patches for making generating a single libjolt.a even easier,
with no gc.a or libid.o dependencies (they'd be statically compiled
in). I envision that our new cairo configure macro could just look for
the presence of libjolt.a and link it in when cairo-jitlblit is
enabled (--enable-jitblit or something). Thoughts?

Once that is done, I think it'll be time to make a general
announcement to the cairo community, since my results will be easily
reproducible, and there will be at least one (OK, _just_ one) full
example of a jolt-compiled render operation.

Dan
-------------- next part --------------
From a1bd6bee69cf8e5cfdb1f3987607a1aae767b35c Mon Sep 17 00:00:00 2001
From: Dan Amelang <dan at amelang.net>
Date: Mon, 16 Apr 2007 23:25:13 -0700
Subject: [PATCH] Move pixel processing loop into the jitted code

This means that instead of calling the jitted code once for each pixel,
we now call it once per composite operation. Obviously, this makes the
operation much faster.
---
 pixman/src/pixman-jitblit.c |   99 +++++++++++++++++++------------------------
 1 files changed, 44 insertions(+), 55 deletions(-)

diff --git a/pixman/src/pixman-jitblit.c b/pixman/src/pixman-jitblit.c
index 6d6d865..c0e9f17 100644
--- a/pixman/src/pixman-jitblit.c
+++ b/pixman/src/pixman-jitblit.c
@@ -22,6 +22,7 @@
  * SOFTWARE.
  *
  * Author: Carl Worth <cworth at cworth.org>
+ *         Dan Amelang <dan at amelang.net>
  */
 
 #include "pixman-jitblit.h"
@@ -31,9 +32,10 @@
 
 #include "jolt.h"
 
-typedef unsigned int pixel_t;
-typedef pixel_t (*pixop_t   )(pixel_t, pixel_t);
-typedef pixop_t (*compiler_t)(const char *);
+typedef void (*compositeop_t) (uint32_t *, uint32_t *, /* src_line,   dest_line */
+                               uint32_t,   uint32_t,   /* src_stride, dest_stride */
+                               uint16_t,   uint16_t);  /* width,      height */
+typedef compositeop_t (*compositeop_compiler_t) (const char *);
 
 void
 _pixman_composite_jitblit (pixman_operator_t	 op,
@@ -49,47 +51,49 @@ _pixman_composite_jitblit (pixman_operator_t	 op,
 			   uint16_t		 width,
 			   uint16_t		 height)
 {
-    uint32_t *src_line, *src_pixel;
-    uint32_t *dest_line, *dest_pixel;
+    uint32_t *src_line;
+    uint32_t *dest_line;
     int src_stride, dest_stride;
-    int w, h;
     int argc = 1;
     char *argv[] = {"cairo", 0};
     char *envp[] = {0};
 
-    compiler_t compile= (compiler_t)jolt_init(&argc, &argv, &envp);
-    pixop_t pixop= compile("\n\
-	(lambda (srcPix dstPix)\n\
-	  (let ((srcA (& 0xff (>> srcPix 24)))\n\
-		(srcR (& 0xff (>> srcPix 16)))\n\
-		(srcG (& 0xff (>> srcPix  8)))\n\
-		(srcB (& 0xff     srcPix    ))\n\
-		(dstA (& 0xff (>> dstPix 24)))\n\
-		(dstR (& 0xff (>> dstPix 16)))\n\
-		(dstG (& 0xff (>> dstPix  8)))\n\
-		(dstB (& 0xff     dstPix    ))\n\
-		(outA (+ dstA (>> (* srcA (- srcA dstA)) 8)))\n\
-		(outR (+ dstR (>> (* srcA (- srcR dstR)) 8)))\n\
-		(outG (+ dstG (>> (* srcA (- srcG dstG)) 8)))\n\
-		(outB (+ dstB (>> (* srcA (- srcB dstB)) 8))))\n\
-	    (| (<<         outA  24)\n\
-	       (<< (& 0xff outR) 16)\n\
-	       (<< (& 0xff outG)  8)\n\
-	           (& 0xff outB)   )))");
-
-    /* XXX: Eventually, we'd like to be compiling a little GLSL chunk like this:
-    const char glsl_over[] =
-	"uniform sampler2D src_image;\n"
-	"uniform sampler2D dst_image;\n"
-	"\n"
-	"void main()\n"
-	"{\n"
-	"    vec4 src_pixel = texture2D(src_image, gl_TexCoord[0].st);\n"
-	"    vec4 dst_pixel = texture2D(dst_image, gl_TexCoord[0].st);\n"
-	"\n"
-	"    gl_FragColor = dst_pixel + src_pixel.aaaa * (src_pixel - dst_pixel);\n"
-	"}";
-    */
+    compositeop_compiler_t compile=
+        (compositeop_compiler_t)jolt_init(&argc, &argv, &envp);
+    compositeop_t compositeop= compile("\n\
+(lambda (src dst src-stride dst-stride width height)\n\
+  (while (!= height 0)\n\
+    (let ((w width)\n\
+          (s src)\n\
+          (d dst))\n\
+      (while (!= w 0)\n\
+        (let ((s-pixel (int@ s))\n\
+              (d-pixel (int@ d))\n\
+              (s-a (& 0xff (>> s-pixel 24)))\n\
+              (s-r (& 0xff (>> s-pixel 16)))\n\
+              (s-g (& 0xff (>> s-pixel  8)))\n\
+              (s-b (& 0xff     s-pixel    ))\n\
+              (s-a-rev          (- 255 s-a))\n\
+              (d-a (& 0xff (>> d-pixel 24)))\n\
+              (d-r (& 0xff (>> d-pixel 16)))\n\
+              (d-g (& 0xff (>> d-pixel  8)))\n\
+              (d-b (& 0xff     d-pixel    ))\n\
+              (o-a (+ s-a (>> (* s-a-rev d-a) 8)))\n\
+              (o-r (+ s-r (>> (* s-a-rev d-r) 8)))\n\
+              (o-g (+ s-g (>> (* s-a-rev d-g) 8)))\n\
+              (o-b (+ s-b (>> (* s-a-rev d-b) 8))))\n\
+             (set (int@ d)\n\
+               (| (<< o-a 24)\n\
+                  (<< o-r 16)\n\
+                  (<< o-g  8)\n\
+                      o-b    )))\n\
+        (set s (+ 4 s))\n\
+        (set w (- w 1))\n\
+        (set d (+ 4 d))))\n\
+    (set height (- height 1))\n\
+    (set src (+ src (* 4 src-stride)))\n\
+    (set dst (+ dst (* 4 dst-stride)))))\n\
+");
 
     /* So far, this function only supports a small subset of what
      * pixman_composite can express. Of course, we'd like to extend it
@@ -103,23 +107,8 @@ _pixman_composite_jitblit (pixman_operator_t	 op,
     assert (mask == NULL);
     assert (dest && dest->format_code == PICT_a8r8g8b8);
 
-    for (h = 0; h < height; h++)
-	for (w = 0; w < width; w++)
-
-
     fbComposeGetStart (dest, dest_x, dest_y, uint32_t, dest_stride, dest_line, 1);
     fbComposeGetStart (src, src_x, src_y, uint32_t, src_stride, src_line, 1);
 
-    for (h = 0; h < height; h++) {
-	dest_pixel = dest_line;;
-	dest_line += dest_stride;
-	src_pixel = src_line;
-	src_line += src_stride;
-
-	for (w = 0; w < width; w++) {
-	    *dest_pixel = pixop (*src_pixel, *dest_pixel);
-	    src_pixel++;
-	    dest_pixel++;
-	}
-    }
+    compositeop (src_line, dest_line, src_stride, dest_stride, width, height);
 }
-- 
1.4.4.2


More information about the JitBlit mailing list