/** * Interpreter for Malbolge Unshackled. * * Maximum rotation width is UINTMAX_MAX, which should be at least 2^64-1. * Malbolge Unshackled uses Unicode for I/O. This interpreter uses UTF-8 * encoding when the program reads from stdin or writes to stdout. * * Please compile with -O3 flag. * * 2017 Matthias Lutter. * Please visit * * To the extent possible under law, the author has dedicated all copyright * and related and neighboring rights to this software to the public domain * worldwide. This software is distributed without any warranty. * * See . */ #include #include #include #include #include #define T0 0 #define T1 1 #define T2 2 typedef struct Trits { int_fast8_t trit; struct Trits* left; struct Trits* right; } Trits; struct MemCell; typedef struct Number { int_fast8_t head; uintmax_t width; Trits* tail; struct MemCell* memptr; // pointer to memory[Number]; 0: to be computed int32_t unicode; // unicode codepoint of number; -1: not an unicode codepoint; -2: to be computed } Number; typedef struct MemCell { Number* val; struct MemCell* next; // pointer to next memory cell (to save computation time) } MemCell; typedef struct MemoryTree { MemCell* cell; struct MemoryTree* child[3]; } MemoryTree; static inline void* malloc_or_die(size_t size) { void* mem = malloc(size); if (!mem) { fprintf(stderr,"out of memory"); exit(1); } return mem; } // step: fixed value between 4 and 12; slack: fixed value between 0 and 5 static inline uintmax_t det_growth_policy(uintmax_t new_wordwidth, uintmax_t old_rotwidth, uintmax_t step, uintmax_t slack) { uintmax_t ret = old_rotwidth; if (new_wordwidth > (old_rotwidth - slack)/2) { if (old_rotwidth > UINTMAX_MAX-step) { fprintf(stderr,"maximal supported rotation width exceeded\n"); exit(1); } ret = old_rotwidth + step; if (new_wordwidth > UINTMAX_MAX/2) { fprintf(stderr,"maximal supported rotation width exceeded\n"); exit(1); } uintmax_t alt = 2*new_wordwidth; if (alt > ret) { ret = alt; } } return ret; } // prob: fixed value between 0.2*RAND_MAX and 0.8*RAND_MAX; slack: fixed value between 0 and 5 static inline uintmax_t nondet_growth_policy(uintmax_t new_wordwidth, uintmax_t old_rotwidth, uintmax_t prob, uintmax_t slack) { uintmax_t ret = old_rotwidth; int change = 0; if (new_wordwidth > old_rotwidth/2) { change = 1; } if (rand() <= prob) { change = 1; } if (change) { if (new_wordwidth > UINTMAX_MAX/2) { fprintf(stderr,"maximal supported rotation width exceeded\n"); exit(1); } if (2*new_wordwidth > ret) { ret = 2*new_wordwidth; } uintmax_t rnd = rand() % (slack+1); if (ret > UINTMAX_MAX-rnd) { fprintf(stderr,"maximal supported rotation width exceeded\n"); exit(1); } ret += rnd; } return ret; } static inline void update_memptr(Number* n, MemoryTree m[]) { if (n->memptr) return; MemoryTree* cur_node = &m[n->head]; MemCell* last_match = cur_node->cell; Trits* it = n->tail; for (uintmax_t i=0; iwidth; i++) { if (cur_node->child[it->trit]) { cur_node = cur_node->child[it->trit]; last_match = cur_node->cell; }else { cur_node->child[it->trit] = (MemoryTree*)malloc_or_die(sizeof(MemoryTree)); cur_node = cur_node->child[it->trit]; if (it->trit == n->head) { cur_node->cell = last_match; }else{ cur_node->cell = (MemCell*)malloc_or_die(sizeof(MemCell)); cur_node->cell->val = 0; cur_node->cell->next = 0; } cur_node->child[0] = 0; cur_node->child[1] = 0; cur_node->child[2] = 0; last_match = cur_node->cell; } it = it->left; } n->memptr = last_match; } /* void print_number(FILE* f, Number* n) { fprintf(f,"...%c%c",'0'+(char)n->head,'0'+(char)n->head); int printed = 0; Trits* it = n->tail->right; for (uintmax_t i=0; iwidth; i++) { if (printed || it->trit != n->head) { fprintf(f,"%c",'0'+(char)it->trit); printed = 1; } it = it->right; } } */ static inline int is_nl(Number* n) { if (n->head != T2) { return 0; } Trits* it = n->tail->right; for (uintmax_t i=0; iwidth-1; i++) { if (it->trit != T2) { return 0; } it = it->right; } if (it->trit != T1) { return 0; } return 1; } static inline Number* clone_number(Number* in){ Number* n = (Number*)malloc_or_die(sizeof(Number)); n->head = in->head; n->width = in->width; n->memptr = in->memptr; n->unicode = in->unicode; n->tail = (Trits*)malloc_or_die(sizeof(Trits)); n->tail->left = n->tail; n->tail->right = n->tail; Trits* it = n->tail; Trits* in_it = in->tail; for (uintmax_t i=0;iwidth;i++) { it->trit = in_it->trit; if (i==in->width-1) { it->left = n->tail; }else{ it->left = (Trits*)malloc_or_die(sizeof(Trits)); } it->left->right = it; it = it->left; in_it = in_it->left; } return n; } static inline void copy_number(Number* n, Number* in) { // clear old trit sequence Trits* it = n->tail; for (uintmax_t i=0; iwidth; i++) { Trits* tmp = it; it = it->left; free(tmp); } n->head = in->head; n->width = in->width; n->memptr = in->memptr; n->unicode = in->unicode; n->tail = (Trits*)malloc_or_die(sizeof(Trits)); n->tail->left = n->tail; n->tail->right = n->tail; it = n->tail; Trits* in_it = in->tail; for (uintmax_t i=0;iwidth;i++) { it->trit = in_it->trit; if (i==in->width-1) { it->left = n->tail; }else{ it->left = (Trits*)malloc_or_die(sizeof(Trits)); } it->left->right = it; it = it->left; in_it = in_it->left; } } static inline void free_number(Number** ptr) { if (!ptr) return; Number* n = *ptr; Trits* it = n->tail; for (uintmax_t i=0; iwidth; i++) { Trits* tmp = it; it = it->left; free(tmp); } free(n); (*ptr) = 0; } static inline void update_unicode(Number* n) { if (n->unicode != -2) { return; // no update needed } if (n->head != T0) { n->unicode = -1; return; } int32_t unicode = 0; int32_t factor = 1; Trits* it = n->tail; for (uintmax_t i=0; iwidth; i++) { unicode += factor * it->trit; if (factor < 0x110000) { factor *= 3; } if (unicode >= 0x110000) { n->unicode = -1; return; } it = it->left; } n->unicode = unicode; } // unicode-character to Number* static inline Number* to_number(int32_t symbol) { if (symbol < 0) { fprintf(stderr,"internal error: unexpected negative value\n"); exit(1); } Number* n = (Number*)malloc_or_die(sizeof(Number)); n->head = T0; n->width = 1; n->memptr = 0; // to be computed n->unicode = (symbol<0x110000?symbol:-1); n->tail = (Trits*)malloc_or_die(sizeof(Trits)); n->tail->left = n->tail; n->tail->right = n->tail; n->tail->trit = symbol % 3; Trits* it = n->tail; while (symbol /= 3) { it->left = (Trits*)malloc_or_die(sizeof(Trits)); it->left->right = it; it = it->left; it->trit = symbol % 3; n->width++; } it->left = n->tail; n->tail->right = it; return n; } static inline Number* nl() { Number* n = (Number*)malloc_or_die(sizeof(Number)); n->head = T2; n->width = 1; n->memptr = 0; // to be computed n->unicode = -1; // no unicode character n->tail = (Trits*)malloc_or_die(sizeof(Trits)); n->tail->left = n->tail; n->tail->right = n->tail; n->tail->trit = T1; return n; } static inline Number* eof() { Number* n = (Number*)malloc_or_die(sizeof(Number)); n->head = T2; n->width = 1; n->memptr = 0; // to be computed n->unicode = -1; // no unicode character n->tail = (Trits*)malloc_or_die(sizeof(Trits)); n->tail->left = n->tail; n->tail->right = n->tail; n->tail->trit = T2; return n; } // correct values only for modul >= 2 and modul <= 29524 static inline int mod(Number* n, int modul) { int result = (29524 % modul) * (int)n->head; Trits* it = n->tail; int position = 1; for (uintmax_t i = 0; iwidth; i++) { result += position * (((int)it->trit)+(modul-(int)n->head)); result %= modul; position *= 3; position %= modul; it = it->left; } return result; } static inline void increment(Number* n) { Trits* it = n->tail; if (n->unicode >= 0 && n->unicode < 0x110000-1) { n->unicode++; }else{ n->unicode = -2; } if (n->memptr) { n->memptr = n->memptr->next; } for (uintmax_t i=0; iwidth; i++) { it->trit++; it->trit %= 3; if (it->trit != 0) return; it = it->left; } if (n->head == T2) { n->head = T0; return; } it = it->right; it->left = (Trits*)malloc_or_die(sizeof(Trits)); it->left->right = it; it = it->left; it->trit = n->head + 1; it->left = n->tail; n->tail->right = it; n->width++; } static inline void xlat2(Number* n) { update_unicode(n); if (n->unicode < 33 || n->unicode > 126) { fprintf(stderr,"cannot apply xlat2\n"); exit(1); } const char* xlat2 = "5z]&gqtyfr$(we4{WP)H-Zn,[%\\3dL+Q;>U!pJS72FhOA1C" \ "B6v^=I_0/8|jsb9m<.TVac`uY*MK'X~xDl}REokN:#?G\"i@"; n->unicode = (int32_t)((unsigned char)xlat2[(n->unicode-33)%94]); // clear old trit sequence Trits* it = n->tail; for (uintmax_t i=0; iwidth; i++) { Trits* tmp = it; it = it->left; free(tmp); } n->tail = 0; n->width = 0; } // this is not done automatically to increase speed static inline void repair_number_after_xlat2(Number* n) { if (n->tail != 0 && n->width != 0) { return; } if (n->unicode < 0) { return; } n->width = 1; // create new trit sequence int32_t symbol = n->unicode; n->tail = (Trits*)malloc_or_die(sizeof(Trits)); n->tail->left = n->tail; n->tail->right = n->tail; n->tail->trit = symbol % 3; Trits* it = n->tail; while (symbol /= 3) { it->left = (Trits*)malloc_or_die(sizeof(Trits)); it->left->right = it; it = it->left; it->trit = symbol % 3; n->width++; } it->left = n->tail; n->tail->right = it; n->memptr = 0; } static inline void rotate_r(Number* n, uintmax_t rotwidth) { while (n->width < rotwidth) { Trits* tmp = n->tail->right; n->tail->right = (Trits*)malloc_or_die(sizeof(Trits)); n->tail->right->right = tmp; tmp->left = n->tail->right; n->tail->right->left = n->tail; n->tail->right->trit = n->head; n->width++; } n->tail = n->tail->left; n->memptr = 0; // to be computed n->unicode = -2; // to be computed } static inline uintmax_t get_real_width(Number* n) { uintmax_t real_width = 0; Trits* it = n->tail; for (uintmax_t i=0; iwidth; i++) { if (it->trit != n->head) { real_width = i+1; } it = it->left; } return real_width; } static inline void opr(Number* a, Number* d) { Trits* it_a = a->tail; Trits* it_d = d->tail; const int_fast8_t OPR[] = { 1,0,0, 1,0,2, 2,2,1}; uintmax_t pos = 0; while (pos < a->width || pos < d->width) { it_a->trit = (it_d->trit = OPR[(it_a->trit%3) + 3*(it_d->trit%3)]); pos++; if (pos >= a->width && pos < d->width) { // insert into a it_a->left = (Trits*)malloc_or_die(sizeof(Trits)); it_a->left->right = it_a; it_a = it_a->left; it_a->trit = a->head; it_a->left = a->tail; a->tail->right = it_a; a->width++; }else{ it_a = it_a->left; } if (pos >= d->width && pos < a->width) { // insert into d it_d->left = (Trits*)malloc_or_die(sizeof(Trits)); it_d->left->right = it_d; it_d = it_d->left; it_d->trit = d->head; it_d->left = d->tail; d->tail->right = it_d; d->width++; }else{ it_d = it_d->left; } } a->head = (d->head = OPR[(a->head%3) + 3*(d->head%3)]); a->memptr = 0; // to be computed a->unicode = -2; // to be computed d->memptr = 0; // to be computed d->unicode = -2; // to be computed } // returns unicode code point or -1 on EOF; exit(1) on error static inline int32_t read_utf8_character() { int32_t in = (int32_t)getchar(); if (in == EOF) { return -1; } if ((in & 0x80) == 0) { return in; } if ((in & 0xE0) == 0xC0) { int in2 = getchar(); if (in2 == EOF || (in2 & 0xC0) != 0x80) { fprintf(stderr,"invalid utf-8 encoding while reading from stdin\n"); exit(1); } return (((in & 0x1F) << 6) | (in2 & 0x3F)); } if ((in & 0xF0) == 0xE0) { int32_t in2 = (int32_t)getchar(); if (in2 == EOF || (in2 & 0xC0) != 0x80) { fprintf(stderr,"invalid utf-8 encoding while reading from stdin\n"); exit(1); } int32_t in3 = (int32_t)getchar(); if (in3 == EOF || (in3 & 0xC0) != 0x80) { fprintf(stderr,"invalid utf-8 encoding while reading from stdin\n"); exit(1); } return (((in & 0x0F) << 12) | ((in2 & 0x3F) << 6) | (in3 & 0x3F)); } if ((in & 0xF8) == 0xF0) { int32_t in2 = (int32_t)getchar(); if (in2 == EOF || (in2 & 0xC0) != 0x80) { fprintf(stderr,"invalid utf-8 encoding while reading from stdin\n"); exit(1); } int32_t in3 = (int32_t)getchar(); if (in3 == EOF || (in3 & 0xC0) != 0x80) { fprintf(stderr,"invalid utf-8 encoding while reading from stdin\n"); exit(1); } int32_t in4 = (int32_t)getchar(); if (in4 == EOF || (in4 & 0xC0) != 0x80) { fprintf(stderr,"invalid utf-8 encoding while reading from stdin\n"); exit(1); } return (((in & 0x07) << 18) | ((in2 & 0x3F) << 12) | ((in3 & 0x3F) << 6) | (in4 & 0x3F)); } fprintf(stderr,"invalid utf-8 encoding while reading from stdin\n"); exit(1); } static inline void print_utf8(int32_t symbol) { if (symbol < 0 || symbol >= 0x110000) { fprintf(stderr,"invalid unicode codepoint\n"); exit(1); } if (symbol < 0x80) { printf("%c",(char)symbol); return; } if (symbol < 0x800) { char first = (0xC0 | (symbol >> 6)); char second = (0x80 | (symbol & 0x3F)); printf("%c%c",first,second); return; } if (symbol < 0x800) { char first = (0xC0 | (symbol >> 6)); char second = (0x80 | (symbol & 0x3F)); printf("%c%c",first,second); return; } if (symbol < 0x10000) { char first = (0xE0 | (symbol >> 12)); char second = (0x80 | ((symbol >> 6) & 0x3F)); char third = (0x80 | (symbol & 0x3F)); printf("%c%c%c",first,second,third); return; } char first = (0xF0 | (symbol >> 18)); char second = (0x80 | ((symbol >> 12) & 0x3F)); char third = (0x80 | ((symbol >> 6) & 0x3F)); char fourth = (0x80 | (symbol & 0x3F)); printf("%c%c%c%c",first,second,third,fourth); } int main(int argc, char* argv[]) { Number* initial_values[6]; MemoryTree memory[3] = {{0,{0,0,0}},{0,{0,0,0}},{0,{0,0,0}}}; for (int_fast8_t i=0;i<3;i++) { memory[i].cell = (MemCell*)malloc_or_die(sizeof(MemCell)); memory[i].cell->val = 0; memory[i].cell->next = 0; } Number* a = to_number(0); Number* c = to_number(0); Number* d = to_number(0); uintmax_t max_wordwidth = 0; srand(time(NULL)); uintmax_t rotwidth = 10 + rand()%6; uintmax_t growth_slack = rand() % 6; uintmax_t growth_step = 4 + rand() % 9; uintmax_t growth_prob; do { growth_prob = rand(); } while (growth_prob < RAND_MAX/5 || growth_prob/4 > RAND_MAX/5); int det_growth = rand()%2; unsigned int result; FILE* file; if (argc < 2) { // read program code from STDIN file = stdin; }else{ file = fopen(argv[1],"rb"); } if (file == NULL) { fprintf(stderr, "file not found: %s\n",argv[1]); return 1; } result = 0; Number* init = to_number(0); MemCell* prev = 0; MemCell* prevprev = 0; update_memptr(init,memory); int pos = 0; while (!feof(file)){ int instr; char val; result = fread(&val,1,1,file); if (result > 1) return 1; if (result == 0) { if (feof(file)) break; else { fprintf(stderr, "error: input error\n"); return 1; } } instr = ((int)val+pos)%94; if (val == ' ' || val == '\t' || val == '\r' || val == '\n'); else if (val >= 33 && val < 127 && (instr == 4 || instr == 5 || instr == 23 || instr == 39 || instr == 40 || instr == 62 || instr == 68 || instr == 81)) { init->memptr->val = to_number(val); prevprev = prev; prev = init->memptr; increment(init); update_memptr(init,memory); if (!prev->next) { prev->next = init->memptr; } pos++; pos%=564; }else{ fprintf(stderr, "invalid character\n"); return 1; //invalid characters are not accepted. } } if (file != stdin) { fclose(file); } if (!prevprev) { fprintf(stderr, "error: not a valid Malbolge program\n"); return 1; } pos %= 6; for (; pos < 18; pos++) { Number* m1 = clone_number(prev->val); Number* m2 = clone_number(prevprev->val); opr(m1, m2); if (pos < 12) { free_number(&m2); }else{ update_unicode(m2); update_memptr(m2,memory); initial_values[pos-12] = m2; } update_unicode(m1); init->memptr->val = m1; prevprev = prev; prev = init->memptr; increment(init); update_memptr(init,memory); if (!prev->next) { prev->next = init->memptr; } } free_number(&init); pos = 0; update_memptr(c,memory); update_memptr(d,memory); int step = 1; while (1) { if (!c->memptr->val) { c->memptr->val = clone_number(initial_values[pos%6]); } update_unicode(c->memptr->val); if (c->memptr->val->unicode < 33 || c->memptr->val->unicode > 126) { fprintf(stderr,"error: invalid instruction in step %d\n",step); return 1; } switch ((c->memptr->val->unicode+pos)%94) { case 4: // jmp if (!d->memptr->val) { copy_number(c, initial_values[mod(d,6)]); }else{ repair_number_after_xlat2(d->memptr->val); update_memptr(d->memptr->val,memory); copy_number(c, d->memptr->val); } update_memptr(c,memory); pos = mod(c,564); if (!c->memptr->val) { c->memptr->val = clone_number(initial_values[pos%6]); } break; case 5: // out // compare A with ...21 if (is_nl(a)) { printf("\n"); }else{ update_unicode(a); print_utf8(a->unicode); } break; case 23: // in { int32_t in = read_utf8_character(); free_number(&a); if (in == -1) { a = to_number(2); a->head = T2; a->unicode = -1; }else if (in == '\n') { a = to_number(1); a->head = T2; }else{ a = to_number(in); } break; } case 39: // rot if (!d->memptr->val) { d->memptr->val = clone_number(initial_values[mod(d,6)]); }else{ repair_number_after_xlat2(d->memptr->val); } rotate_r(d->memptr->val, rotwidth); copy_number(a,d->memptr->val); break; case 40: // movd if (!d->memptr->val) { copy_number(d,initial_values[mod(d,6)]); }else{ repair_number_after_xlat2(d->memptr->val); update_memptr(d->memptr->val,memory); copy_number(d, d->memptr->val); } update_memptr(d,memory); // check rotwidth if (d->width > max_wordwidth) { uintmax_t w = get_real_width(d); if (w > max_wordwidth) { max_wordwidth = w; if (det_growth) { rotwidth = det_growth_policy(max_wordwidth, rotwidth, growth_step, growth_slack); }else{ rotwidth = nondet_growth_policy(max_wordwidth, rotwidth, growth_prob, growth_slack); } } } break; case 62: // opr if (!d->memptr->val) { d->memptr->val = clone_number(initial_values[mod(d,6)]); }else{ repair_number_after_xlat2(d->memptr->val); } opr(a,d->memptr->val); break; case 81: // hlt return 0; case 68: default: // nop break; } xlat2(c->memptr->val); prev = c->memptr; increment(c); update_memptr(c,memory); if (!prev->next) { prev->next = c->memptr; } pos++; pos %= 564; prev = d->memptr; increment(d); update_memptr(d,memory); if (!prev->next) { prev->next = d->memptr; } step++; } }