JFFS3 & performance

Artem B. Bityuckiy dedekind at infradead.org
Fri Jan 7 09:31:29 EST 2005


I'm sorry, I attached old version. Here is the lastest which I used.

--
Best Regards,
Artem B. Bityuckiy,
St.-Petersburg, Russia.
-------------- next part --------------
/*
 * 	Copyright (C)
 * 		Artem B. Bityuckiy, dedekind at infradead.org
 * 		Joern Engel, joern at wohnheim.fh-wedel.de
 * 
 *      This program is free software; you can redistribute it and/or modify
 *      it under the terms of the GNU General Public License as published by
 *      the Free Software Foundation; either version 2 of the License, or
 *      (at your option) any later version.
 *
 *      This program is distributed in the hope that it will be useful,
 *      but WITHOUT ANY WARRANTY; without even the implied warranty of
 *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *      GNU General Public License for more details.
 *
 *      You should have received a copy of the GNU General Public License
 *      along with this program; if not, write to the Free Software
 *      Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 *      Version: 1.5
 */
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/pagemap.h>
#include <linux/spinlock.h>
#include <linux/crc32.h>
#include <linux/crc32c.h>
#include <linux/crc-ccitt.h>
#include <asm/timex.h>

#if defined CONFIG_ARM || defined CONFIG_ARM_THUMB

/* 
 * In case of ARM we do not have any cycles counter, so use timers
 * instead.
 */
#include <asm/mach/time.h>
extern struct sys_timer *system_timer;

#define GET_CYCLES_SUPPORTED	0

#define ARCH_TIMESTAMP()	system_timer->offset()
#define arch_timestamp_t	unsigned long
#define ARCH_TEST_PREFIX	"[crctest] (ARM timers) "
#define ARCH_CAST_ULL(ts)	(unsigned long long)(ts)
#define ARCH_DELTA(ts1, ts2)	ARCH_CAST_ULL(ts2 - ts1)
#define ARCH_ITERATIONS		10

#else

#define GET_CYCLES_SUPPORTED	1

#endif

/*
 * Most architectures have some kind of hight resolution time-stamp
 * counter and define the get_cycles() macro to access it (asm-timex.h).
 * But some do not. For those we need to do something spetial. In case
 * of ARM we use timers.
 */
#if GET_CYCLES_SUPPORTED
/* Time-stamp function */
#define TIMESTAMP()	get_cycles()
/* Time-stamp type */
#define timestamp_t	cycles_t
/* The prefix for test's output */
#define TEST_PREFIX	"[crctst] (get_cycles) "
/* The macro to cast time-stamp type to unsigned long long) */
#define CAST_ULL(ts)	(unsigned long long)(ts)
/* The difference between two time-stamps (unsigned long long) */
#define DELTA(ts1, ts2)	CAST_ULL(ts2 - ts1)
/* The number of iterations of CRC calculation */
#define ITERATIONS	1 /* get_cycles is too accurate to iterate */
#else
#define TIMESTAMP()	ARCH_TIMESTAMP()
#define timestamp_t	arch_timestamp_t
#define TEST_PREFIX	ARCH_TEST_PREFIX
#define CAST_ULL(ts)	ARCH_CAST_ULL(ts)
#define DELTA(ts1, ts2)	ARCH_DELTA(ts1, ts2)
#define ITERATIONS	ARCH_ITERATIONS
#endif

/*
 * Tests are performed with interrupt and preemption disabled.
 */
static unsigned long irq_flags;
#define lock()							\
	do {							\
		preempt_disable();				\
		local_irq_save(irq_flags);			\
	} while(0)
#define unlock()						\
	do {							\
		preempt_enable();				\
		local_irq_restore(irq_flags);			\
	} while (0)


/* 
 * In order to not lost much interupts we relax the system from
 * time to time during testing. This may be important if we perform
 * tests on machine that must does other work.
 */
#define RELAX()								\
	do {								\
		unlock();						\
		cond_resched();						\
		lock();							\
	} while(0)
		
	
/* The test results output macro */
#define PRINT_RESULTS(name, bytes, ts1, ts2)				\
	do {								\
		printk(KERN_NOTICE TEST_PREFIX name ", %d bytes: "	\
			"delta %llu, ts1 %llu, ts2 %llu\n", bytes,	\
			DELTA(ts1, ts2), CAST_ULL(ts1), CAST_ULL(ts2));	\
	} while(0)

/* 
 * The size of vmalloc'ed array used to prune any test-related data
 * from the CPU data cache.
 */
#define TMPMEM_SIZE 1*1024*1024

/* The number of memory cunks to test */
#define MEM_CHUNKS 5

static unsigned long
adler32(unsigned long adler, const unsigned char *buf, size_t len);

static uint32_t
adler32r(uint32_t adler, const char *buf, size_t len);

static uint32_t
engel32(uint32_t engel, const void *_s, size_t len);

static uint32_t
engel32r(uint32_t engel, const void *_s, size_t len);

static void
trash_cache(void);

/* 
 * The sizes of memory chunks for which CRCs should be tested.
 */
static int memsizes[MEM_CHUNKS] = {32, PAGE_SIZE, 32*1024, 64*1024, 128*1024};

/* 
 * The buffer which we use to trash the L1 cache
 */
static char *tmp_mem;


/* 
 * We perform actual testing in the module initialization function.
 */
static int __init
init_crctest(void)
{
	register int i, j;
	char *mem[MEM_CHUNKS];
	timestamp_t ts1, ts2;
	int ret = 0;

	if ((tmp_mem = vmalloc(TMPMEM_SIZE)) == NULL) {
		printk(KERN_ERR TEST_PREFIX "can't allocate %d bytes\n",
					TMPMEM_SIZE);
		ret = -ENOMEM;
		goto exit;
	}

	memset(&mem[0], '\0', MEM_CHUNKS * sizeof(char *));

	/* Allocate memory */
	for (i = 0; i < MEM_CHUNKS; i++) {
		if ((mem[i] = kmalloc(memsizes[i], GFP_KERNEL)) == NULL) {
			printk(KERN_ERR TEST_PREFIX "can't allocate %d bytes\n",
					memsizes[i]);
			ret = -ENOMEM;
			goto exit;
		}
	}
	
	/* 
	 * We do not want to be preempted during the test as well as do
	 * not want interrupts affect our results.
	 */
	lock();
	
	/* 
	 * Now we gonna measure the difference between passing arrays
	 * two times forward/on time backward and one time forward.
	 */
	for (i = 0; i < MEM_CHUNKS; i++) {


		/* Trash the CPU data chache */
		trash_cache();
		ts1 = TIMESTAMP();
		for (j = 0; j < memsizes[i]; j++)
			mem[i][j] = mem[i][j] + 1;
		for (j = 0; j < memsizes[i]; j++)
			mem[i][j] = mem[i][j] + 1;
		ts2 = TIMESTAMP();
		PRINT_RESULTS("Data pass both forward", memsizes[i], ts1, ts2);


		trash_cache();
		ts1 = TIMESTAMP();
		for (j = memsizes[i] - 1; j >= 0; j--)
			mem[i][j] = mem[i][j] + 1;
		for (j = memsizes[i] - 1; j >= 0; j--)
			mem[i][j] = mem[i][j] + 1;
		ts2 = TIMESTAMP();
		PRINT_RESULTS("Data pass both backward", memsizes[i], ts1, ts2);

		trash_cache();
		ts1 = TIMESTAMP();
		for (j = memsizes[i] - 1; j >= 0; j--)
			mem[i][j] = mem[i][j] + 1;
		for (j = 0; j < memsizes[i]; j++)
			mem[i][j] = mem[i][j] + 1;
		ts2 = TIMESTAMP();
		PRINT_RESULTS("Data pass backward/forward", memsizes[i], ts1, ts2);

		trash_cache();
		ts1 = TIMESTAMP();
		for (j = 0; j < memsizes[i]; j++)
			mem[i][j] = mem[i][j] + 1;
		for (j = memsizes[i] - 1; j >= 0; j--)
			mem[i][j] = mem[i][j] + 1;
		ts2 = TIMESTAMP();
		PRINT_RESULTS("Data pass forward/backward", memsizes[i], ts1, ts2);
	}
	
	/* 
	 * Test adler32 CRC.
	 */
	for (i = 0; i < MEM_CHUNKS; i++) {
		unsigned long crc;
		
		/* Do one fake pass to exclude CPU cache influence */
		crc = adler32(0xFFFFFFFF, mem[i], memsizes[i]); 
		
		ts1 = TIMESTAMP();
		for (j = 0; j < ITERATIONS; j++)
			crc = adler32(0xFFFFFFFF, mem[i], memsizes[i]); 
		ts2 = TIMESTAMP();
		PRINT_RESULTS("adler32", memsizes[i], ts1, ts2);

		RELAX();
	}
	
	/* 
	 * Test adler32r CRC.
	 */
	for (i = 0; i < MEM_CHUNKS; i++) {
		unsigned long crc;
		
		crc = adler32r(0xFFFFFFFF, mem[i], memsizes[i]); 
		
		ts1 = TIMESTAMP();
		for (j = 0; j < ITERATIONS; j++)
			crc = adler32r(0xFFFFFFFF, mem[i], memsizes[i]); 
		ts2 = TIMESTAMP();
		PRINT_RESULTS("adler32r", memsizes[i], ts1, ts2);

		RELAX();
	}
	
	/* 
	 * Test engel32 CRC.
	 */
	for (i = 0; i < MEM_CHUNKS; i++) {
		uint32_t crc;
		
		crc = engel32(0xFFFFFFFF, mem[i], memsizes[i]); 
		
		ts1 = TIMESTAMP();
		for (j = 0; j < ITERATIONS; j++)
			crc = engel32(0xFFFFFFFF, mem[i], memsizes[i]); 
		ts2 = TIMESTAMP();
		PRINT_RESULTS("engel32", memsizes[i], ts1, ts2);

		RELAX();
	}
	
	/* 
	 * Test engel32r CRC.
	 */
	for (i = 0; i < MEM_CHUNKS; i++) {
		uint32_t crc;
		
		crc = engel32r(0xFFFFFFFF, mem[i], memsizes[i]); 
		
		ts1 = TIMESTAMP();
		for (j = 0; j < ITERATIONS; j++)
			crc = engel32r(0xFFFFFFFF, mem[i], memsizes[i]); 
		ts2 = TIMESTAMP();
		PRINT_RESULTS("engel32r", memsizes[i], ts1, ts2);

		RELAX();
	}

	/* 
	 * Test 16 bit CRC CCITT.
	 */
	for (i = 0; i < MEM_CHUNKS; i++) {
		u16 crc;
		
		crc = crc_ccitt(0xFFFF, mem[i], memsizes[i]);

		ts1 = TIMESTAMP();
		for (j = 0; j < ITERATIONS; j++)
			crc = crc_ccitt(0xFFFF, mem[i], memsizes[i]); 
		ts2 = TIMESTAMP();

		PRINT_RESULTS("16-bit CRC CCITT", memsizes[i], ts1, ts2);

		RELAX();
	}
	
	/* 
	 * Test crc32 CRC.
	 */
	for (i = 0; i < MEM_CHUNKS; i++) {
		u32 crc;
		
		crc = crc32(0xFFFFFFFF, mem[i], memsizes[i]); 
		
		ts1 = TIMESTAMP();
		for (j = 0; j < ITERATIONS; j++)
			crc = crc32(0xFFFFFFFF, mem[i], memsizes[i]); 
		ts2 = TIMESTAMP();
		PRINT_RESULTS("CRC32", memsizes[i], ts1, ts2);

		RELAX();
	}
	
	/* Test crc32c */
	for (i = 0; i < MEM_CHUNKS; i++) {
		u32 crc;
		
		crc = crc32c(0xFFFFFFFF, mem[i], memsizes[i]); 
		
		ts1 = TIMESTAMP();
		for (j = 0; j < ITERATIONS; j++)
			crc = crc32c(0xFFFFFFFF, mem[i], memsizes[i]); 
		ts2 = TIMESTAMP();
		PRINT_RESULTS("CRC32c", memsizes[i], ts1, ts2);

		RELAX();
	}

	unlock();
	
exit:
	if (tmp_mem != NULL)
		vfree(tmp_mem);

	for (i = 0; i < MEM_CHUNKS && mem[i] != NULL; i++)
		kfree(mem[i]);

	return ret;
}

module_init(init_crctest);

static void __exit
cleanup_crctest(void)
{
	return;
}

module_exit(cleanup_crctest);

/*
 * In order to prune our data from the CPU cache, we scan big data
 * array.
 */
static void
trash_cache(void) {
	register int i;
	for (i = 0; i < TMPMEM_SIZE; i++)
		tmp_mem[i] = tmp_mem[i] + 1;
}

/* ----------------------------------------------------------------------- */

/*
 * Was borrowed from include/linux/zutil.h
 * Copyright (C) 1995-1998 Jean-loup Gailly.
 */
#define NMAX 5552
#define BASE 65521L
#define DO1(buf,i)  {s1 += buf[i]; s2 += s1;}
#define DO2(buf,i)  DO1(buf,i); DO1(buf,i+1);
#define DO4(buf,i)  DO2(buf,i); DO2(buf,i+2);
#define DO8(buf,i)  DO4(buf,i); DO4(buf,i+4);
#define DO16(buf)   DO8(buf,0); DO8(buf,8);

static unsigned long
adler32(unsigned long adler, const unsigned char *buf, size_t len)
{
    unsigned long s1 = adler & 0xffff;
    unsigned long s2 = (adler >> 16) & 0xffff;
    int k;

    if (buf == NULL) return 1L;

    while (len > 0) {
        k = len < NMAX ? len : NMAX;
        len -= k;
        while (k >= 16) {
            DO16(buf);
            buf += 16;
            k -= 16;
        }
        if (k != 0) do {
            s1 += *buf++;
            s2 += s1;
        } while (--k);
        s1 %= BASE;
        s2 %= BASE;
    }
    return (s2 << 16) | s1;
}

/*
 * Reverse version of adler32 (provided by Jorn Engel).
 */
static uint32_t
adler32r(uint32_t adler, const char *buf, size_t len)
{
	unsigned long s1 = adler & 0xffff;
	unsigned long s2 = (adler >> 16) & 0xffff;
	int k;

	if (!buf)
		return 1L;

	buf += len;
	while (len > 0) {
		k = len < NMAX ? len : NMAX;
		len -= k;
		while (k >= 16) {
			buf -= 16;
			DO16(buf);
			k -= 16;
		}
		if (k != 0)
			do {
				s1 += *--buf;
				s2 += s1;
			} while (--k);
		s1 %= BASE;
		s2 %= BASE;
	}
	return (s2 << 16) | s1;
}

/*
 * Jorn Engel's algorithms.
 */
static uint32_t
engel32(uint32_t engel, const void *_s, size_t len)
{
	const char *s = _s;
	uint32_t sum=engel, prod=engel;
	for (; len>=4; len-=4, s+=4) {
		sum += s[0];
		prod += sum;
		sum += s[1];
		prod += sum;
		sum += s[2];
		prod += sum;
		sum += s[3];
		prod += sum;
	}
	for (; len; len--, s++) {
		sum += *s;
		prod += sum;
	}
	sum = (sum&0x0000ffff)<<16^ (sum&0xffff0000)>>16;
	sum = (sum&0x00ff00ff)<<8 ^ (sum&0xff00ff00)>>8;
	sum = (sum&0x0f0f0f0f)<<4 ^ (sum&0xf0f0f0f0)>>4;
	sum = (sum&0x33333333)<<2 ^ (sum&0xcccccccc)>>2;
	sum = (sum&0x55555555)<<1 ^ (sum&0xaaaaaaaa)>>1;
	prod ^= sum;
	return prod;
}

static uint32_t
engel32r(uint32_t engel, const void *_s, size_t len)
{
        const char *s = _s;
        uint32_t sum=engel, prod=engel;
        for (; len>=4; len-=4, s+=4) {
                sum += s[len-1];
                prod += sum;
                sum += s[len-2];
                prod += sum;
                sum += s[len-3];
                prod += sum;
                sum += s[len-4];
                prod += sum;
        }
        for (; len; len--, s++) {
                sum += s[len];
                prod += sum;
        }
        sum = (sum&0x0000ffff)<<16^ (sum&0xffff0000)>>16;
        sum = (sum&0x00ff00ff)<<8 ^ (sum&0xff00ff00)>>8;
        sum = (sum&0x0f0f0f0f)<<4 ^ (sum&0xf0f0f0f0)>>4;
        sum = (sum&0x33333333)<<2 ^ (sum&0xcccccccc)>>2;
        sum = (sum&0x55555555)<<1 ^ (sum&0xaaaaaaaa)>>1;
        prod ^= sum;
        return prod;
}

/*MODULE_VERSION("1.5");
MODULE_LICENSE ("GPL");
MODULE_AUTHOR ("Artem B. Bityuckiy");
MODULE_DESCRIPTION ("The CRC test");*/



More information about the linux-mtd mailing list