JFFS3 & performance
Artem B. Bityuckiy
dedekind at infradead.org
Fri Jan 7 09:31:29 EST 2005
I'm sorry, I attached old version. Here is the lastest which I used.
--
Best Regards,
Artem B. Bityuckiy,
St.-Petersburg, Russia.
-------------- next part --------------
/*
* Copyright (C)
* Artem B. Bityuckiy, dedekind at infradead.org
* Joern Engel, joern at wohnheim.fh-wedel.de
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Version: 1.5
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/pagemap.h>
#include <linux/spinlock.h>
#include <linux/crc32.h>
#include <linux/crc32c.h>
#include <linux/crc-ccitt.h>
#include <asm/timex.h>
#if defined CONFIG_ARM || defined CONFIG_ARM_THUMB
/*
* In case of ARM we do not have any cycles counter, so use timers
* instead.
*/
#include <asm/mach/time.h>
extern struct sys_timer *system_timer;
#define GET_CYCLES_SUPPORTED 0
#define ARCH_TIMESTAMP() system_timer->offset()
#define arch_timestamp_t unsigned long
#define ARCH_TEST_PREFIX "[crctest] (ARM timers) "
#define ARCH_CAST_ULL(ts) (unsigned long long)(ts)
#define ARCH_DELTA(ts1, ts2) ARCH_CAST_ULL(ts2 - ts1)
#define ARCH_ITERATIONS 10
#else
#define GET_CYCLES_SUPPORTED 1
#endif
/*
* Most architectures have some kind of hight resolution time-stamp
* counter and define the get_cycles() macro to access it (asm-timex.h).
* But some do not. For those we need to do something spetial. In case
* of ARM we use timers.
*/
#if GET_CYCLES_SUPPORTED
/* Time-stamp function */
#define TIMESTAMP() get_cycles()
/* Time-stamp type */
#define timestamp_t cycles_t
/* The prefix for test's output */
#define TEST_PREFIX "[crctst] (get_cycles) "
/* The macro to cast time-stamp type to unsigned long long) */
#define CAST_ULL(ts) (unsigned long long)(ts)
/* The difference between two time-stamps (unsigned long long) */
#define DELTA(ts1, ts2) CAST_ULL(ts2 - ts1)
/* The number of iterations of CRC calculation */
#define ITERATIONS 1 /* get_cycles is too accurate to iterate */
#else
#define TIMESTAMP() ARCH_TIMESTAMP()
#define timestamp_t arch_timestamp_t
#define TEST_PREFIX ARCH_TEST_PREFIX
#define CAST_ULL(ts) ARCH_CAST_ULL(ts)
#define DELTA(ts1, ts2) ARCH_DELTA(ts1, ts2)
#define ITERATIONS ARCH_ITERATIONS
#endif
/*
* Tests are performed with interrupt and preemption disabled.
*/
static unsigned long irq_flags;
#define lock() \
do { \
preempt_disable(); \
local_irq_save(irq_flags); \
} while(0)
#define unlock() \
do { \
preempt_enable(); \
local_irq_restore(irq_flags); \
} while (0)
/*
* In order to not lost much interupts we relax the system from
* time to time during testing. This may be important if we perform
* tests on machine that must does other work.
*/
#define RELAX() \
do { \
unlock(); \
cond_resched(); \
lock(); \
} while(0)
/* The test results output macro */
#define PRINT_RESULTS(name, bytes, ts1, ts2) \
do { \
printk(KERN_NOTICE TEST_PREFIX name ", %d bytes: " \
"delta %llu, ts1 %llu, ts2 %llu\n", bytes, \
DELTA(ts1, ts2), CAST_ULL(ts1), CAST_ULL(ts2)); \
} while(0)
/*
* The size of vmalloc'ed array used to prune any test-related data
* from the CPU data cache.
*/
#define TMPMEM_SIZE 1*1024*1024
/* The number of memory cunks to test */
#define MEM_CHUNKS 5
static unsigned long
adler32(unsigned long adler, const unsigned char *buf, size_t len);
static uint32_t
adler32r(uint32_t adler, const char *buf, size_t len);
static uint32_t
engel32(uint32_t engel, const void *_s, size_t len);
static uint32_t
engel32r(uint32_t engel, const void *_s, size_t len);
static void
trash_cache(void);
/*
* The sizes of memory chunks for which CRCs should be tested.
*/
static int memsizes[MEM_CHUNKS] = {32, PAGE_SIZE, 32*1024, 64*1024, 128*1024};
/*
* The buffer which we use to trash the L1 cache
*/
static char *tmp_mem;
/*
* We perform actual testing in the module initialization function.
*/
static int __init
init_crctest(void)
{
register int i, j;
char *mem[MEM_CHUNKS];
timestamp_t ts1, ts2;
int ret = 0;
if ((tmp_mem = vmalloc(TMPMEM_SIZE)) == NULL) {
printk(KERN_ERR TEST_PREFIX "can't allocate %d bytes\n",
TMPMEM_SIZE);
ret = -ENOMEM;
goto exit;
}
memset(&mem[0], '\0', MEM_CHUNKS * sizeof(char *));
/* Allocate memory */
for (i = 0; i < MEM_CHUNKS; i++) {
if ((mem[i] = kmalloc(memsizes[i], GFP_KERNEL)) == NULL) {
printk(KERN_ERR TEST_PREFIX "can't allocate %d bytes\n",
memsizes[i]);
ret = -ENOMEM;
goto exit;
}
}
/*
* We do not want to be preempted during the test as well as do
* not want interrupts affect our results.
*/
lock();
/*
* Now we gonna measure the difference between passing arrays
* two times forward/on time backward and one time forward.
*/
for (i = 0; i < MEM_CHUNKS; i++) {
/* Trash the CPU data chache */
trash_cache();
ts1 = TIMESTAMP();
for (j = 0; j < memsizes[i]; j++)
mem[i][j] = mem[i][j] + 1;
for (j = 0; j < memsizes[i]; j++)
mem[i][j] = mem[i][j] + 1;
ts2 = TIMESTAMP();
PRINT_RESULTS("Data pass both forward", memsizes[i], ts1, ts2);
trash_cache();
ts1 = TIMESTAMP();
for (j = memsizes[i] - 1; j >= 0; j--)
mem[i][j] = mem[i][j] + 1;
for (j = memsizes[i] - 1; j >= 0; j--)
mem[i][j] = mem[i][j] + 1;
ts2 = TIMESTAMP();
PRINT_RESULTS("Data pass both backward", memsizes[i], ts1, ts2);
trash_cache();
ts1 = TIMESTAMP();
for (j = memsizes[i] - 1; j >= 0; j--)
mem[i][j] = mem[i][j] + 1;
for (j = 0; j < memsizes[i]; j++)
mem[i][j] = mem[i][j] + 1;
ts2 = TIMESTAMP();
PRINT_RESULTS("Data pass backward/forward", memsizes[i], ts1, ts2);
trash_cache();
ts1 = TIMESTAMP();
for (j = 0; j < memsizes[i]; j++)
mem[i][j] = mem[i][j] + 1;
for (j = memsizes[i] - 1; j >= 0; j--)
mem[i][j] = mem[i][j] + 1;
ts2 = TIMESTAMP();
PRINT_RESULTS("Data pass forward/backward", memsizes[i], ts1, ts2);
}
/*
* Test adler32 CRC.
*/
for (i = 0; i < MEM_CHUNKS; i++) {
unsigned long crc;
/* Do one fake pass to exclude CPU cache influence */
crc = adler32(0xFFFFFFFF, mem[i], memsizes[i]);
ts1 = TIMESTAMP();
for (j = 0; j < ITERATIONS; j++)
crc = adler32(0xFFFFFFFF, mem[i], memsizes[i]);
ts2 = TIMESTAMP();
PRINT_RESULTS("adler32", memsizes[i], ts1, ts2);
RELAX();
}
/*
* Test adler32r CRC.
*/
for (i = 0; i < MEM_CHUNKS; i++) {
unsigned long crc;
crc = adler32r(0xFFFFFFFF, mem[i], memsizes[i]);
ts1 = TIMESTAMP();
for (j = 0; j < ITERATIONS; j++)
crc = adler32r(0xFFFFFFFF, mem[i], memsizes[i]);
ts2 = TIMESTAMP();
PRINT_RESULTS("adler32r", memsizes[i], ts1, ts2);
RELAX();
}
/*
* Test engel32 CRC.
*/
for (i = 0; i < MEM_CHUNKS; i++) {
uint32_t crc;
crc = engel32(0xFFFFFFFF, mem[i], memsizes[i]);
ts1 = TIMESTAMP();
for (j = 0; j < ITERATIONS; j++)
crc = engel32(0xFFFFFFFF, mem[i], memsizes[i]);
ts2 = TIMESTAMP();
PRINT_RESULTS("engel32", memsizes[i], ts1, ts2);
RELAX();
}
/*
* Test engel32r CRC.
*/
for (i = 0; i < MEM_CHUNKS; i++) {
uint32_t crc;
crc = engel32r(0xFFFFFFFF, mem[i], memsizes[i]);
ts1 = TIMESTAMP();
for (j = 0; j < ITERATIONS; j++)
crc = engel32r(0xFFFFFFFF, mem[i], memsizes[i]);
ts2 = TIMESTAMP();
PRINT_RESULTS("engel32r", memsizes[i], ts1, ts2);
RELAX();
}
/*
* Test 16 bit CRC CCITT.
*/
for (i = 0; i < MEM_CHUNKS; i++) {
u16 crc;
crc = crc_ccitt(0xFFFF, mem[i], memsizes[i]);
ts1 = TIMESTAMP();
for (j = 0; j < ITERATIONS; j++)
crc = crc_ccitt(0xFFFF, mem[i], memsizes[i]);
ts2 = TIMESTAMP();
PRINT_RESULTS("16-bit CRC CCITT", memsizes[i], ts1, ts2);
RELAX();
}
/*
* Test crc32 CRC.
*/
for (i = 0; i < MEM_CHUNKS; i++) {
u32 crc;
crc = crc32(0xFFFFFFFF, mem[i], memsizes[i]);
ts1 = TIMESTAMP();
for (j = 0; j < ITERATIONS; j++)
crc = crc32(0xFFFFFFFF, mem[i], memsizes[i]);
ts2 = TIMESTAMP();
PRINT_RESULTS("CRC32", memsizes[i], ts1, ts2);
RELAX();
}
/* Test crc32c */
for (i = 0; i < MEM_CHUNKS; i++) {
u32 crc;
crc = crc32c(0xFFFFFFFF, mem[i], memsizes[i]);
ts1 = TIMESTAMP();
for (j = 0; j < ITERATIONS; j++)
crc = crc32c(0xFFFFFFFF, mem[i], memsizes[i]);
ts2 = TIMESTAMP();
PRINT_RESULTS("CRC32c", memsizes[i], ts1, ts2);
RELAX();
}
unlock();
exit:
if (tmp_mem != NULL)
vfree(tmp_mem);
for (i = 0; i < MEM_CHUNKS && mem[i] != NULL; i++)
kfree(mem[i]);
return ret;
}
module_init(init_crctest);
static void __exit
cleanup_crctest(void)
{
return;
}
module_exit(cleanup_crctest);
/*
* In order to prune our data from the CPU cache, we scan big data
* array.
*/
static void
trash_cache(void) {
register int i;
for (i = 0; i < TMPMEM_SIZE; i++)
tmp_mem[i] = tmp_mem[i] + 1;
}
/* ----------------------------------------------------------------------- */
/*
* Was borrowed from include/linux/zutil.h
* Copyright (C) 1995-1998 Jean-loup Gailly.
*/
#define NMAX 5552
#define BASE 65521L
#define DO1(buf,i) {s1 += buf[i]; s2 += s1;}
#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1);
#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2);
#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4);
#define DO16(buf) DO8(buf,0); DO8(buf,8);
static unsigned long
adler32(unsigned long adler, const unsigned char *buf, size_t len)
{
unsigned long s1 = adler & 0xffff;
unsigned long s2 = (adler >> 16) & 0xffff;
int k;
if (buf == NULL) return 1L;
while (len > 0) {
k = len < NMAX ? len : NMAX;
len -= k;
while (k >= 16) {
DO16(buf);
buf += 16;
k -= 16;
}
if (k != 0) do {
s1 += *buf++;
s2 += s1;
} while (--k);
s1 %= BASE;
s2 %= BASE;
}
return (s2 << 16) | s1;
}
/*
* Reverse version of adler32 (provided by Jorn Engel).
*/
static uint32_t
adler32r(uint32_t adler, const char *buf, size_t len)
{
unsigned long s1 = adler & 0xffff;
unsigned long s2 = (adler >> 16) & 0xffff;
int k;
if (!buf)
return 1L;
buf += len;
while (len > 0) {
k = len < NMAX ? len : NMAX;
len -= k;
while (k >= 16) {
buf -= 16;
DO16(buf);
k -= 16;
}
if (k != 0)
do {
s1 += *--buf;
s2 += s1;
} while (--k);
s1 %= BASE;
s2 %= BASE;
}
return (s2 << 16) | s1;
}
/*
* Jorn Engel's algorithms.
*/
static uint32_t
engel32(uint32_t engel, const void *_s, size_t len)
{
const char *s = _s;
uint32_t sum=engel, prod=engel;
for (; len>=4; len-=4, s+=4) {
sum += s[0];
prod += sum;
sum += s[1];
prod += sum;
sum += s[2];
prod += sum;
sum += s[3];
prod += sum;
}
for (; len; len--, s++) {
sum += *s;
prod += sum;
}
sum = (sum&0x0000ffff)<<16^ (sum&0xffff0000)>>16;
sum = (sum&0x00ff00ff)<<8 ^ (sum&0xff00ff00)>>8;
sum = (sum&0x0f0f0f0f)<<4 ^ (sum&0xf0f0f0f0)>>4;
sum = (sum&0x33333333)<<2 ^ (sum&0xcccccccc)>>2;
sum = (sum&0x55555555)<<1 ^ (sum&0xaaaaaaaa)>>1;
prod ^= sum;
return prod;
}
static uint32_t
engel32r(uint32_t engel, const void *_s, size_t len)
{
const char *s = _s;
uint32_t sum=engel, prod=engel;
for (; len>=4; len-=4, s+=4) {
sum += s[len-1];
prod += sum;
sum += s[len-2];
prod += sum;
sum += s[len-3];
prod += sum;
sum += s[len-4];
prod += sum;
}
for (; len; len--, s++) {
sum += s[len];
prod += sum;
}
sum = (sum&0x0000ffff)<<16^ (sum&0xffff0000)>>16;
sum = (sum&0x00ff00ff)<<8 ^ (sum&0xff00ff00)>>8;
sum = (sum&0x0f0f0f0f)<<4 ^ (sum&0xf0f0f0f0)>>4;
sum = (sum&0x33333333)<<2 ^ (sum&0xcccccccc)>>2;
sum = (sum&0x55555555)<<1 ^ (sum&0xaaaaaaaa)>>1;
prod ^= sum;
return prod;
}
/*MODULE_VERSION("1.5");
MODULE_LICENSE ("GPL");
MODULE_AUTHOR ("Artem B. Bityuckiy");
MODULE_DESCRIPTION ("The CRC test");*/
More information about the linux-mtd
mailing list