JFFS3 & performance

Artem B. Bityuckiy dedekind at infradead.org
Thu Dec 23 12:02:31 EST 2004


Created new version of test. Review please. What else should we add? 

Fixed bug: memcmp -> memset.
Added buffers passes in different directions.
Added few more features.

P.S. Didn't run - only compiled. Right now crashed PC and do some 
recovery.

/*
 *      This program is free software; you can redistribute it and/or 
modify
 *      it under the terms of the GNU General Public License as published 
by
 *      the Free Software Foundation; either version 2 of the License, or
 *      (at your option) any later version.
 *
 *      This program is distributed in the hope that it will be useful,
 *      but WITHOUT ANY WARRANTY; without even the implied warranty of
 *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *      GNU General Public License for more details.
 *
 *      You should have received a copy of the GNU General Public License
 *      along with this program; if not, write to the Free Software
 *      Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 *      Author: Artem B. Bityuckiy, dedekind at infradead.org
 *      Version: 1.3
 */
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/pagemap.h>
#include <linux/spinlock.h>
#include <linux/crc32.h>
#include <linux/crc32c.h>
#include <linux/crc-ccitt.h>
#include <asm/timex.h>

/* The number of memory cunks to test */
#define MEM_CHUNKS 4

/* The test iterations number */
#define ITERATIONS 1

/*
 * The size of vmalloc'ed array used to prune any test-related data
 * from the CPU data cache.
 */
#define TMPMEM_SIZE 1*1024*1024

#define TEST_PREFIX "[crctst] "

/*
 * Most architectures have some kind of hight resolution time-stamp
 * counter and define the get_cycles() macro to access it.
 */
#define TIMESTAMP get_cycles()


/*
 * In order to not lost much interupts we relax the system from
 * time to time during testing
 */
#define RELAX()                                                 \
        do {                                                    \
                spin_unlock_irqrestore(&lock, flags);           \
                yield();                                        \
                spin_lock_irqsave(&lock, flags);                \
        } while(0)

#define PRINT_RESULTS(name, bytes, ts1, ts2)                    \
        printk(KERN_NOTICE TEST_PREFIX name ", %d bytes: "      \
                " delta %llu, ts1 %llu, ts2 %llu", bytes,       \
                (unsigned long long)(ts2 - ts1),                \
                (unsigned long long)ts1,                        \
                (unsigned long long)ts2)                        \


static unsigned long
adler32(unsigned long adler, const unsigned char *buf, size_t len);

static uint32_t
adler32r(uint32_t adler, const char *buf, size_t len);

static uint32_t
engel32(uint32_t engel, const void *_s, size_t len);

static uint32_t
engel32r(uint32_t engel, const void *_s, size_t len);

static void
trash_cache(void);

/*
 * The sizes of memory chunks for which CRCs should be tested.
 */
static int memsizes[MEM_CHUNKS] = {32, PAGE_SIZE, 32*1024, 64*1024};

static char *tmp_mem;


/*
 * We perform actual testing in the module initialization function.
 */
static int __init
init_crctest(void)
{
        register int i, j;
        char *mem[MEM_CHUNKS];
        unsigned long flags;
        spinlock_t lock = SPIN_LOCK_UNLOCKED;
        int ret = 0;
        cycles_t ts1, ts2;

        if ((tmp_mem = vmalloc(TMPMEM_SIZE)) == NULL) {
                printk(KERN_ERR TEST_PREFIX "can't allocate %d bytes\n",
                                        TMPMEM_SIZE);
                ret = -ENOMEM;
                goto exit;
        }

        memset(&mem[0], '\0', MEM_CHUNKS * sizeof(char *));

        /* Allocate memory */
        for (i = 0; i < MEM_CHUNKS; i++) {
                if ((mem[i] = kmalloc(memsizes[i], GFP_KERNEL)) == NULL) {
                        printk(KERN_ERR TEST_PREFIX "can't allocate %d 
bytes\n",
                                        memsizes[i]);
                        ret = -ENOMEM;
                        goto exit;
                }
        }

        /*
         * We do not want to be preempted during the test as well as do
         * not want interrupts affect our results. Both of these are
         * prevented by spin_lock_irqsave().
         */
        spin_lock_irqsave(&lock, flags);

        /*
         * Now we gonna measure the difference between passing arrays
         * two times forward/on time backward and one time forward.
         */
        for (i = 0; i < MEM_CHUNKS; i++) {

                /* Trash the CPU data chache */
                trash_cache();

                ts1 = TIMESTAMP;
                for (i = 0; i < TMPMEM_SIZE; i++)
                        tmp_mem[i] = tmp_mem[i] + 1;
                for (i = 0; i < TMPMEM_SIZE; i++)
                        tmp_mem[i] = tmp_mem[i] + 1;
                ts2 = TIMESTAMP;
                PRINT_RESULTS("Data pass both forward", memsizes[i], ts1, 
ts2);

                trash_cache();

                ts1 = TIMESTAMP;
                for (i = TMPMEM_SIZE; i >= 0; i--)
                        tmp_mem[i] = tmp_mem[i] + 1;
                for (i = 0; i < TMPMEM_SIZE; i++)
                        tmp_mem[i] = tmp_mem[i] + 1;
                ts2 = TIMESTAMP;
                PRINT_RESULTS("Data pass backward/forward", memsizes[i], 
ts1, ts2);
        }

        RELAX();

        /* Test adler32 */
        for (i = 0; i < MEM_CHUNKS; i++) {
                unsigned long crc;

                crc = adler32(0xFFFFFFFF, mem[i], memsizes[i]);
                ts1 = TIMESTAMP;
                for (j = 0; j < ITERATIONS; j++)
                        crc = adler32(0xFFFFFFFF, mem[i], memsizes[i]);
                ts2 = TIMESTAMP;
                PRINT_RESULTS("adler32", memsizes[i], ts1, ts2);
        }

        RELAX();

        /* Test adler32r */
        for (i = 0; i < MEM_CHUNKS; i++) {
                unsigned long crc;

                crc = adler32r(0xFFFFFFFF, mem[i], memsizes[i]);
                ts1 = TIMESTAMP;
                for (j = 0; j < ITERATIONS; j++)
                        crc = adler32r(0xFFFFFFFF, mem[i], memsizes[i]);
                ts2 = TIMESTAMP;
                PRINT_RESULTS("adler32r", memsizes[i], ts1, ts2);
        }

        RELAX();

        /* Test engel32 */
        for (i = 0; i < MEM_CHUNKS; i++) {
                uint32_t crc;

                crc = engel32(0xFFFFFFFF, mem[i], memsizes[i]);
                ts1 = TIMESTAMP;
                for (j = 0; j < ITERATIONS; j++)
                        crc = engel32(0xFFFFFFFF, mem[i], memsizes[i]);
                ts2 = TIMESTAMP;
                PRINT_RESULTS("engel32", memsizes[i], ts1, ts2);
        }

        RELAX();

        /* Test engel32r */
        for (i = 0; i < MEM_CHUNKS; i++) {
                uint32_t crc;

                crc = engel32r(0xFFFFFFFF, mem[i], memsizes[i]);
                ts1 = TIMESTAMP;
                for (j = 0; j < ITERATIONS; j++)
                        crc = engel32r(0xFFFFFFFF, mem[i], memsizes[i]);
                ts2 = TIMESTAMP;
                PRINT_RESULTS("engel32r", memsizes[i], ts1, ts2);
        }

        RELAX();

        /* Test 16 bit CRC CCITT */
        for (i = 0; i < MEM_CHUNKS; i++) {
                u16 crc;

                /* Do one fake pass to exclude CPU cache influence */
                crc = crc_ccitt(0xFFFF, mem[i], memsizes[i]);

                ts1 = TIMESTAMP;
                for (j = 0; j < ITERATIONS; j++)
                        crc = crc_ccitt(0xFFFF, mem[i], memsizes[i]);
                ts2 = TIMESTAMP;
                PRINT_RESULTS("16-bit CRC CCITT", memsizes[i], ts1, ts2);
        }

        RELAX();

        /* Test crc32 */
        for (i = 0; i < MEM_CHUNKS; i++) {
                u32 crc;

                crc = crc32(0xFFFFFFFF, mem[i], memsizes[i]);
                ts1 = TIMESTAMP;
                for (j = 0; j < ITERATIONS; j++)
                        crc = crc32(0xFFFFFFFF, mem[i], memsizes[i]);
                ts2 = TIMESTAMP;
                PRINT_RESULTS("CRC32", memsizes[i], ts1, ts2);
        }

        RELAX();

        /* Test crc32c */
        for (i = 0; i < MEM_CHUNKS; i++) {
                u32 crc;

                crc = crc32c(0xFFFFFFFF, mem[i], memsizes[i]);
                ts1 = TIMESTAMP;
                for (j = 0; j < ITERATIONS; j++)
                        crc = crc32c(0xFFFFFFFF, mem[i], memsizes[i]);
                ts2 = TIMESTAMP;
                PRINT_RESULTS("CRC32c", memsizes[i], ts1, ts2);
        }

        spin_unlock_irqrestore(&lock, flags);

exit:
        if (tmp_mem != NULL)
                vfree(tmp_mem);

        for (i = 0; i < MEM_CHUNKS && mem[i] != NULL; i++)
                kfree(mem[i]);

        return ret;
}

module_init(init_crctest);

static void __exit
cleanup_crctest(void)
{
        return;
}

module_exit(cleanup_crctest);

/*
 * In order to prune our data from the CPU cache, we scan big data
 * array.
 */
static void
trash_cache(void) {
        register int i;
        for (i = 1; i < TMPMEM_SIZE; i++)
                tmp_mem[i-1] = tmp_mem[i] + 1;
}

/* ----------------------------------------------------------------------- 
*/

/*
 * Was borrowed from include/linux/zutil.h
 */
#define NMAX 5552
#define BASE 65521L
#define DO1(buf,i)  {s1 += buf[i]; s2 += s1;}
#define DO2(buf,i)  DO1(buf,i); DO1(buf,i+1);
#define DO4(buf,i)  DO2(buf,i); DO2(buf,i+2);
#define DO8(buf,i)  DO4(buf,i); DO4(buf,i+4);
#define DO16(buf)   DO8(buf,0); DO8(buf,8);

static unsigned long
adler32(unsigned long adler, const unsigned char *buf, size_t len)
{
    unsigned long s1 = adler & 0xffff;
    unsigned long s2 = (adler >> 16) & 0xffff;
    int k;

    if (buf == NULL) return 1L;

    while (len > 0) {
        k = len < NMAX ? len : NMAX;
        len -= k;
        while (k >= 16) {
            DO16(buf);
            buf += 16;
            k -= 16;
        }
        if (k != 0) do {
            s1 += *buf++;
            s2 += s1;
        } while (--k);
        s1 %= BASE;
        s2 %= BASE;
    }
    return (s2 << 16) | s1;
}

/*
 * Reverse version of adler32 (provided by Jorn Engel).
 */
static uint32_t
adler32r(uint32_t adler, const char *buf, size_t len)
{
        unsigned long s1 = adler & 0xffff;
        unsigned long s2 = (adler >> 16) & 0xffff;
        int k;

        if (!buf)
                return 1L;

        buf += len;
        while (len > 0) {
                k = len < NMAX ? len : NMAX;
                len -= k;
                while (k >= 16) {
                        buf -= 16;
                        DO16(buf);
                        k -= 16;
                }
                if (k != 0)
                        do {
                                s1 += *--buf;
                                s2 += s1;
                        } while (--k);
                s1 %= BASE;
                s2 %= BASE;
        }
        return (s2 << 16) | s1;
}

/*
 * Jorn Engel's algorithms.
 */
static uint32_t
engel32(uint32_t engel, const void *_s, size_t len)
{
        const char *s = _s;
        uint32_t sum=engel, prod=engel;
        for (; len>=4; len-=4, s+=4) {
                sum += s[0];
                prod += sum;
                sum += s[1];
                prod += sum;
                sum += s[2];
                prod += sum;
                sum += s[3];
                prod += sum;
        }
        for (; len; len--, s++) {
                sum += *s;
                prod += sum;
        }
        sum = (sum&0x0000ffff)<<16^ (sum&0xffff0000)>>16;
        sum = (sum&0x00ff00ff)<<8 ^ (sum&0xff00ff00)>>8;
        sum = (sum&0x0f0f0f0f)<<4 ^ (sum&0xf0f0f0f0)>>4;
        sum = (sum&0x33333333)<<2 ^ (sum&0xcccccccc)>>2;
        sum = (sum&0x55555555)<<1 ^ (sum&0xaaaaaaaa)>>1;
        prod ^= sum;
        return prod;
}

static uint32_t
engel32r(uint32_t engel, const void *_s, size_t len)
{
        const char *s = _s;
        uint32_t sum=engel, prod=engel;
        for (; len>=4; len-=4, s+=4) {
                sum += s[len-1];
                prod += sum;
                sum += s[len-2];
                prod += sum;
                sum += s[len-3];
                prod += sum;
                sum += s[len-4];
                prod += sum;
        }
        for (; len; len--, s++) {
                sum += s[len];
                prod += sum;
        }
        sum = (sum&0x0000ffff)<<16^ (sum&0xffff0000)>>16;
        sum = (sum&0x00ff00ff)<<8 ^ (sum&0xff00ff00)>>8;
        sum = (sum&0x0f0f0f0f)<<4 ^ (sum&0xf0f0f0f0)>>4;
        sum = (sum&0x33333333)<<2 ^ (sum&0xcccccccc)>>2;
        sum = (sum&0x55555555)<<1 ^ (sum&0xaaaaaaaa)>>1;
        prod ^= sum;
        return prod;
}

MODULE_LICENSE ("GPL");
MODULE_AUTHOR ("Artem B. Bityuckiy");
MODULE_DESCRIPTION ("The CRC test");


--
Best Regards,
Artem B. Bityuckiy,
St.-Petersburg, Russia.




More information about the linux-mtd mailing list