Thiết kế website giá rẻ

Question

I I am working with POSIX regex functions to perform string matching in my C program. I have set up the matching rules and initialized them properly. Within my make_token function, I am invoking the regexec function to sequentially check if the incoming characters match the specified patterns.

static struct rule {
  const char *regex;
  int token_type;
} rules[] = {
  {" +", TK_NOTYPE},    // spaces
  {"\+", '+'},         // plus
  {"\-", '-'},
  {"\*", '*'},         // multiply or TK_DEREF
  {"\/", '/'},
  {"\(", '('},
  {"\)", ')'},
  {"==", TK_EQ},       
  {"!=", TK_NEQ},       
  {"&&", TK_AND},
  {"\$[a-zA-Z0-9]+",TK_REG},
  {"0x[0-9a-fA-F]+",TK_HEX},
  {"[0-9]+", TK_DEC},  
};
#define NR_REGEX ARRLEN(rules)
static regex_t re[NR_REGEX] = {};
void init_regex() {
  int i;
  char error_msg[128];
  int ret;

  for (i = 0; i < NR_REGEX; i ++) {
    ret = regcomp(&re[i], rules[i].regex, REG_EXTENDED);    
    if (ret != 0) {
      regerror(ret, &re[i], error_msg, 128);
      panic("regex compilation failed: %sn%s", error_msg, rules[i].regex);
    }
  }
}

typedef struct token {
  int type;
  char str[32];
} Token;

static Token tokens[32] __attribute__((used)) = {};
static int nr_token __attribute__((used))  = 0;

static bool make_token(char *e) {
  int position = 0;
  int i;
  regmatch_t pmatch;

  nr_token = 0;

  while (e[position] != '') {
    /* Try all rules one by one. */
    for (i = 0; i < NR_REGEX; i ++) {
      if (regexec(&re[i], e + position, 1, &pmatch, 0) == 0 && pmatch.rm_so == 0) {
        char *substr_start = e + position;
        int substr_len = pmatch.rm_eo;

        // Log("match rules[%d] = "%s" at position %d with len %d: %.*s",
        //     i, rules[i].regex, position, substr_len, substr_len, substr_start);

        position += substr_len;

        /* TODO: Now a new token is recognized with rules[i]. Add codes
         * to record the token in the array `tokens'. For certain types
         * of tokens, some extra actions should be performed.
         */

        switch (rules[i].token_type) {
          case TK_DEC: case TK_HEX:
          {
            if(substr_len > 31) {
              Log("The input number is too long!");
              return false;
            }
            tokens[nr_token].type = rules[i].token_type;
            strncpy(tokens[nr_token].str, substr_start, substr_len);
            tokens[nr_token].str[substr_len] = '';
            ++nr_token;
            break;
          }
          case TK_NOTYPE: break;
          default: 
          {
            tokens[nr_token].type = rules[i].token_type;
            strncpy(tokens[nr_token].str, substr_start, substr_len);
            tokens[nr_token].str[substr_len] = '';
            ++nr_token;
            break;
          }
        }
        if(nr_token > 32) {
          Log("The token number of the input expression is too large!");
          return false;
        }
        break;
      }
    }
    if (i == NR_REGEX) {
      printf("no match at position %dn%sn%*.s^n", position, e, position, "");
      return false;
    }
  } // end while
  return true;
}

Behavior Without AddressSanitizer:
When I compile and run my program without AddressSanitizer, it executes successfully and performs the string matching as expected.

Behavior With AddressSanitizer:
Upon enabling AddressSanitizer, my program throws a heap-buffer-overflow error. Debugging with gdb revealed that the error is triggered during the execution of the regexec function.

**Specific Error Message: **

==14631==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x602000003a16 at pc 0x7f017febcfd9 bp 0x7ffd16c98e30 sp 0x7ffd16c985d8
READ of size 5 at 0x602000003a16 thread T0
    #0 0x7f017febcfd8 in __interceptor_regexec ../../../../src/libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc:7756
    #1 0x55c052b5a2af in make_token src/monitor/sdb/expr.c:103
    #2 0x55c052b5b304 in expr src/monitor/sdb/expr.c:252
    #3 0x55c052b5b8b2 in cmd_p src/monitor/sdb/sdb.c:135
    #4 0x55c052b5bead in sdb_mainloop src/monitor/sdb/sdb.c:247
    #5 0x55c052b5dff1 in engine_start src/engine/interpreter/init.c:25
    #6 0x55c052b572df in main src/nemu-main.c:37
    #7 0x7f017a8d3d8f in __libc_start_call_main ../sysdeps/nptl/libc_start_call_main.h:58
    #8 0x7f017a8d3e3f in __libc_start_main_impl ../csu/libc-start.c:392
    #9 0x55c052b57204 in _start (/home/xyr/ics2023/nemu/build/riscv32-nemu-interpreter+0xc204)

0x602000003a16 is located 0 bytes to the right of 6-byte region [0x602000003a10,0x602000003a16)
allocated by thread T0 here:
    #0 0x7f017ff1f887 in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:145
    #1 0x7f017fe50bac in xmalloc (/lib/x86_64-linux-gnu/libreadline.so.8+0x39bac)

SUMMARY: AddressSanitizer: heap-buffer-overflow ../../../../src/libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc:7756 in __interceptor_regexec
Shadow bytes around the buggy address:
  0x0c047fff86f0: fa fa fd fa fa fa fd fa fa fa fd fa fa fa fd fa
  0x0c047fff8700: fa fa fd fa fa fa fd fd fa fa fd fd fa fa fd fd
  0x0c047fff8710: fa fa fd fd fa fa fd fd fa fa fd fd fa fa fd fd
  0x0c047fff8720: fa fa fd fd fa fa fd fd fa fa fd fd fa fa fd fd
  0x0c047fff8730: fa fa fd fd fa fa fd fd fa fa fd fd fa fa 00 00
=>0x0c047fff8740: fa fa[06]fa fa fa 00 04 fa fa 06 fa fa fa fa fa
  0x0c047fff8750: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
  0x0c047fff8760: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
  0x0c047fff8770: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
  0x0c047fff8780: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
  0x0c047fff8790: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
Shadow byte legend (one shadow byte represents 8 application bytes):
  Addressable:           00
  Partially addressable: 01 02 03 04 05 06 07 
  Heap left redzone:       fa
  Freed heap region:       fd
  Stack left redzone:      f1
  Stack mid redzone:       f2
  Stack right redzone:     f3
  Stack after return:      f5
  Stack use after scope:   f8
  Global redzone:          f9
  Global init order:       f6
  Poisoned by user:        f7
  Container overflow:      fc
  Array cookie:            ac
  Intra object redzone:    bb
  ASan internal:           fe
  Left alloca redzone:     ca
  Right alloca redzone:    cb
  Shadow gap:              cc
==14631==ABORTING

I am seeking insight into why the heap-buffer-overflow error might be triggered when regexec is called, despite the program working fine without AddressSanitizer. Any assistance you could provide would be greatly appreciated.

Environment:
Ubuntu 22.04.4 LTS
gcc 11.4.0

Thiết kế website giá rẻ

Danh mục

Heap-Buffer-Overflow Error When Using POSIX Regex Functions with AddressSanitizer