/*
 * Time mprotect()
 * gcc -Wall -Wextra -O1 mmap_mprotect.c -o mmap_mprotect
 * DEBUG: gcc -Wall -Wextra -g -fsanitize=undefined -O1 mmap_mprotect.c -o mmap_mprotect
 * ./mmap_mprotect <mapLen> <regionCnt>
 */

#include <sys/mman.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <fcntl.h>
#include <string.h>
#include <strings.h>

typedef volatile char  vchar_t;
vchar_t   **ts_map;

// static long long  mmap_len = 1 * 130 * 1024;
// static long long  mmap_len;

static __inline__ u_int64_t start_clock();
static __inline__ u_int64_t stop_clock();

int main(int argc, char **argv)
{
    int i, ret;
    long long int j;
    if (argc < 3) {
       printf("USAGE: %s map-len-in-kiB regionCnt\n", argv[0]);
       return 1;
    }

    // map region length
    //
    int mmap_len = 1024LL * atoi(argv[1]);
    int pagesize = sysconf(_SC_PAGESIZE);
    int prot;

    // loop iteration count - map regions to create
    //
    long int iterations = atol(argv[2]);

    // Pointer to mmap region
    //
    vchar_t* ts_map;
    ts_map = (vchar_t *) mmap(NULL, mmap_len * iterations,
                  PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0L);

    if (ts_map == MAP_FAILED) {
      perror(" mmap");
      exit(1);
    }

    // Do a write per page
    for (j = 0; j < mmap_len * iterations; j += pagesize) {
      ts_map[j] = 1;
    }

     // Time the mprotect calls -alternate protection between PROT_NONE and PROT_READ | PROT_WRITE
     //
     u_int64_t start_rdtsc = start_clock();

     for (i = 0; i < iterations; i++) {
       if (i % 2 == 0 ) {
         prot = PROT_NONE;
       } else {
         prot = PROT_READ | PROT_WRITE;
       }
       ret =mprotect((void *)ts_map + i*mmap_len, mmap_len, prot);
       if (ret != 0) {
         perror("mprotect");
         printf(" mmap error at iteration %d from %ld\n", i, iterations);
       }
     }

     u_int64_t stop_rdtsc = stop_clock();
     u_int64_t diff = stop_rdtsc-start_rdtsc;

     printf("TSC for %ld mprotect calls with len of %lldkiB: %ld K-cycles.  Avg: %g K-cycles/call\n",
            iterations,
            mmap_len/(1024LL),
            diff/1000,
            ((double)(diff)/(double)(iterations))/1000.0);

     return 0;
}

static __inline__ u_int64_t start_clock() {
    // See: Intel Doc #324264, "How to Benchmark Code Execution Times on Intel...",
    u_int32_t hi, lo;
    __asm__ __volatile__ (
        "CPUID\n\t"
        "RDTSC\n\t"
        "mov %%edx, %0\n\t"
        "mov %%eax, %1\n\t": "=r" (hi), "=r" (lo)::
        "%rax", "%rbx", "%rcx", "%rdx");
    return ( (u_int64_t)lo) | ( ((u_int64_t)hi) << 32);
}

static __inline__ u_int64_t stop_clock() {
    // See: Intel Doc #324264, "How to Benchmark Code Execution Times on Intel...",
    u_int32_t hi, lo;
    __asm__ __volatile__(
        "RDTSCP\n\t"
        "mov %%edx, %0\n\t"
        "mov %%eax, %1\n\t"
        "CPUID\n\t": "=r" (hi), "=r" (lo)::
        "%rax", "%rbx", "%rcx", "%rdx");
    return ( (u_int64_t)lo) | ( ((u_int64_t)hi) << 32);
}
