Browse code

Move utf8 helper functions to re_data. Add redata_op_addn().

Dario Rodriguez authored on 02/09/2020 22:07:13
Showing 5 changed files
... ...
@@ -2,7 +2,7 @@ CC=gcc
2 2
 CFLAGS=-g -Wall -Wformat-truncation=0 -Ideps -I/usr/include/SDL2
3 3
 LDFLAGS=-lSDL2 -lSDL2_image -lSDL2_ttf
4 4
 
5
-all: re tests tests_ui
5
+all: re tests tests_utf8
6 6
 
7 7
 recenteditor.o: recenteditor.c re_data.h ext/socklib.h
8 8
 
... ...
@@ -14,7 +14,7 @@ re_ui.o: re_ui.c re_ui.h hack_regular.h
14 14
 
15 15
 re_tests.o: re_tests.c re_data.h
16 16
 
17
-re_tests_ui.o: re_tests_ui.c re_ui.h
17
+re_tests_utf8.o: re_tests_utf8.c re_ui.h
18 18
 
19 19
 sha3.o: sha3/sha3.c sha3/sha3.h
20 20
 	$(CC) $(CFLAGS) -Isha3 -c -o sha3.o sha3/sha3.c
... ...
@@ -37,8 +37,8 @@ re: recenteditor.o re_data.o re_plugin_unsaved.o sha3.o re_ui.o hack_regular.o s
37 37
 tests: re_tests.o re_data.o sha3.o
38 38
 	$(CC) $(LDFLAGS) -o tests re_tests.o re_data.o sha3.o
39 39
 
40
-tests_ui: re_tests_ui.o re_ui.o hack_regular.o
41
-	$(CC) $(LDFLAGS) -o tests_ui re_tests_ui.o re_ui.o hack_regular.o
40
+tests_utf8: re_tests_utf8.o re_data.o sha3.o
41
+	$(CC) $(LDFLAGS) -o tests_utf8 re_tests_utf8.o re_data.o sha3.o
42 42
 
43 43
 clean:
44 44
 	rm -f recenteditor.o re_data.o re_tests.o re_plugin_unsaved.o sha3.o re_ui.o hack_regular.c hack_regular.h hack_regular.o re tests 
... ...
@@ -26,6 +26,7 @@
26 26
 //#define CHUNKSIZE 32768
27 27
 #define CHUNKSIZE 160
28 28
 #define UNDOBLOCK 1024
29
+#define ADDNBLOCK 1024
29 30
 #define UNDOGROWSIZE (256*1024)
30 31
 #define SECURESAVEPREFIX "."
31 32
 #define SECURESAVEPOSTFIX ".saving"
... ...
@@ -860,6 +861,28 @@ redata_op_add(redata_t *redata, long insertpos, char *buf, long buflen, undostac
860 861
         return(0);
861 862
 }
862 863
 
864
+int
865
+redata_op_addn(redata_t *redata, long pos, char character, long n, undostack_t *fromhere)
866
+{
867
+        if(redata==NULL || n<0)
868
+                return(-1); /* sanity check failed */
869
+        if(n==0)
870
+                return(0); /* nothing to do */
871
+        if(redata->sizeaddnbuf<n) {
872
+                char *newptr;
873
+                long newsize;
874
+                newsize=(n+1+ADDNBLOCK)/ADDNBLOCK;
875
+                newsize*=ADDNBLOCK;
876
+                if((newptr=realloc(redata->addnbuf,newsize))==NULL)
877
+                        return(-1); /* insuf. mem. */
878
+                redata->addnbuf=newptr;
879
+                redata->sizeaddnbuf=newsize;
880
+        }
881
+        memset(redata->addnbuf,character,n);
882
+        redata->addnbuf[n]='\0';
883
+        return(redata_op_add(redata, pos, redata->addnbuf, n, fromhere));
884
+}
885
+
863 886
 int
864 887
 redata_op_del(redata_t *redata, long delpos, long size, undostack_t *fromhere)
865 888
 {
... ...
@@ -1249,6 +1272,64 @@ redata_generic_genname(char *filename,char *prefix, char *postfix, char *buf, in
1249 1272
         return(name);
1250 1273
 }
1251 1274
 
1275
+int
1276
+redata_generic_utf8len(char *ptr, int size)
1277
+{
1278
+        int len,i;
1279
+        /* calculate the number of utf8-charaters in buffer */
1280
+        if(size<0 || (ptr==NULL && size!=0))
1281
+                return(-1);
1282
+        /* for now we only count the number of code points */
1283
+        /* in UTF8: 0x00-0x7f single byte chars
1284
+         *          0xc0-0xff leading bytes
1285
+         *          0x80-0xbf continuation bytes (ignore these for len)*/
1286
+/*#warning TODO: XXX support combining code points (at least U+0300 - U+036F ( https://en.wikipedia.org/wiki/Combining_character ) */
1287
+        for(len=0,i=0;i<size;i++)
1288
+                len+=((ptr[i]&0xc0)!=0x80)?1:0;
1289
+        return(len);
1290
+/*#warning TODO: XXX Also consider tabs*/
1291
+}
1292
+
1293
+char *
1294
+redata_generic_utf8col(char *ptr, int size, int col)
1295
+{
1296
+        int len,i;
1297
+        /* return a pointer to the "n"th ("col"th) utf8-character in buffer */
1298
+        if(size<0 || (ptr==NULL && size!=0))
1299
+                return(NULL); /* sanity check failed */
1300
+        /* see reui_utf8len() for explanation of algorithm */
1301
+/*#warning TODO: support combining code points (at least U+0300 - U+036F ( https://en.wikipedia.org/wiki/Combining_character ) */
1302
+        if(col>=size)
1303
+                return(NULL);/* col greater than maximum possible char. count */
1304
+        /* skip "col" amount of single byte chars and leading bytes */
1305
+        for(len=0,i=0;len<col && i<size;i++)
1306
+                len+=((ptr[i]&0xc0)!=0x80)?1:0;
1307
+        /* if we landed in a continuation byte, advance until next single byte chars or leading byte */
1308
+        while(i<size && (ptr[i]&0xc0)==0x80)
1309
+                i++;
1310
+        if(i>=size)
1311
+                return(NULL); /* col is beyond end of string */
1312
+        return(ptr+i);
1313
+/*#warning TODO: XXX Also consider tabs*/
1314
+}
1315
+
1316
+int
1317
+redata_generic_utf8charlen(char *ptr, int maxsize)
1318
+{
1319
+        int i;
1320
+        /* returns the len in bytes of the character starting at ptr (zero on error)*/
1321
+        if(ptr==NULL || maxsize<1)
1322
+                return(0); /* sanity check failed */
1323
+/*#warning TODO: support combining code points (at least U+0300 - U+036F ( https://en.wikipedia.org/wiki/Combining_character ) */
1324
+        if(((unsigned char *)ptr)[0]<0x80)
1325
+                return(1); /* single byte char */
1326
+        if((ptr[0]&0xc0)==0x80)
1327
+                return(0); /* error: this is continuation, not leading byte */
1328
+        for(i=1;i<maxsize && (ptr[i]&0xc0)==0x80;i++)
1329
+                ;
1330
+        return(i);
1331
+}
1332
+
1252 1333
 
1253 1334
 static int
1254 1335
 redata_hash_gen(redata_t *redata, char *filename, char *buf, long buflen, char *resbuf129bytes)
... ...
@@ -68,6 +68,8 @@ typedef struct redata_t {
68 68
         rechunk_t **chunks;
69 69
         long available;
70 70
         rechunk_t *tmpchunk;
71
+        long sizeaddnbuf;
72
+        char *addnbuf;
71 73
         /* undo */
72 74
         undostack_t undostack;
73 75
         undostack_t redostack;
... ...
@@ -113,6 +115,7 @@ int redata_load(redata_t *redata, char *filename, int (*callback_question)(/*cha
113 115
 int redata_save(redata_t *redata, char *filename);
114 116
 
115 117
 int redata_op_add(redata_t *redata, long pos, char *buf, long buflen, undostack_t *fromhere);
118
+int redata_op_addn(redata_t *redata, long pos, char character, long n, undostack_t *fromhere);
116 119
 int redata_op_del(redata_t *redata, long pos, long size, undostack_t *fromhere);
117 120
 int redata_op_move(redata_t *redata, long posorig, long size, long posdest, undostack_t *fromhere);
118 121
 int redata_op_undo(redata_t *redata);
... ...
@@ -126,7 +129,14 @@ int redata_hash(redata_t *redata, char *resbuf129bytes);
126 129
 int redata_filehash(redata_t *redata, char *filename, char *resbuf129bytes);
127 130
 int redata_memhash(redata_t *redata, char *buf, long buflen, char *resbuf129bytes);
128 131
 undostack_t *redata_getstack(redata_t *redata, undo_t *undo);
132
+
129 133
 char *redata_generic_genname(char *filename,char *prefix, char *postfix, char *buf, int bufsize);
130 134
 
135
+int redata_generic_utf8len(char *ptr, int size);
136
+char *redata_generic_utf8col(char *ptr, int size, int col);
137
+int redata_generic_utf8charlen(char *ptr, int maxsize);
138
+
139
+
131 140
 /* line convenience funtions */
132 141
 int redata_line_info(redata_t *redata, long pos, long *startpos, char **startptr, int *len);
142
+
133 143
deleted file mode 100644
... ...
@@ -1,150 +0,0 @@
1
-/*
2
- * re_tests_ui.c
3
- *
4
- * A programmers editor
5
- *
6
- * Tests (ensure correct functionality of modules)
7
- *
8
- * Author: Dario Rodriguez dario@softhome.net
9
- * This program is licensed under the terms of GNU GPL v2.1+
10
- */
11
-
12
-#include <stdio.h>
13
-#include <stdlib.h>
14
-#include <unistd.h>
15
-#include <string.h>
16
-#include <limits.h>
17
-#include <fcntl.h>
18
-#include <errno.h>
19
-
20
-#include "re_ui.h"
21
-
22
-#define TEST_OK "OK"
23
-
24
-typedef struct test_ui_t  {
25
-        int needs_realui;
26
-        char *name;
27
-        char *(*fn)(reui_t *,char *,int , int, int, int);
28
-} test_ui_t;
29
-
30
-char *test_utf8len(reui_t *ui, char *teststring, int expectednchars, int dummy1, int dummy2,int dummy3);
31
-char *test_utf8col(reui_t *ui, char *teststring, int dummy0, int col, int expectedoffset,int dummy3);
32
-char *test_utf8charlen(reui_t *ui, char *teststring, int dummy0, int dummy1, int offset,int charsizeatoffset);
33
-
34
-int
35
-main(int argc, char *argv[])
36
-{
37
-        struct reui_t dummyui;
38
-        struct {
39
-                char string[1024];
40
-                int nchars;
41
-                int col;
42
-                int offsetcol;
43
-                int charsizeatoffsetcol;
44
-        } teststrings[]={
45
-                {{"This is a latin1 string"},51-28,1,1,1},
46
-                {{"lowercase acute vowels:\xc3\xa1\xc3\xa9\xc3\xad\xc3\xb3\xc3\xba"},23+5,23+1,23+2,2},
47
-        };
48
-        test_ui_t tests[]={
49
-                {0,"utf8len",test_utf8len},
50
-                {0,"utf8col",test_utf8col},
51
-                {0,"utf8charlen",test_utf8charlen},
52
-        };
53
-        int flag_exit=0,flag_all=0;
54
-        reui_t *ui;
55
-        int i,s;
56
-        int nerrors,total;
57
-        char *res;
58
-        for(i=1;i<argc;i++) {
59
-                if(strcmp(argv[i],"--help")==0) {
60
-                        fprintf(stderr,"Syntax: %s [--all] [--exit] [--help]\nExplanation:\n\t--all: do even the slow tests\n\t--exit: exit program at first unsuccessful test\n\t--help: this text\n",argv[0]);
61
-                        return(1);
62
-                } else if(strcmp(argv[i],"--all")==0) {
63
-                        flag_all=1;
64
-                } else if(strcmp(argv[i],"--exit")==0) {
65
-                        flag_exit=1;
66
-                }
67
-        }
68
-        nerrors=0;
69
-        total=0;
70
-        /* flag_all is not used right now, next line is to silence the compiler */
71
-        total+=(flag_all&0);
72
-        /* end of hack */
73
-        memset(&dummyui,0,sizeof(reui_t));
74
-        for(s=0;s<(sizeof(teststrings)/sizeof(teststrings[0]));s++) {
75
-                fprintf(stderr,"\"%s\"\n",teststrings[s].string);
76
-                for(i=0;i<(sizeof(tests)/sizeof(tests[0]));i++,total++) {
77
-                        ui=&dummyui;
78
-                        if(tests[i].needs_realui && (ui=reui_init(NULL))==NULL) {
79
-                                fprintf(stderr,"ERROR: problem initializing ui module\n");
80
-                                return(1);
81
-                        }
82
-                        fprintf(stderr,"%i:%s...",total+1,tests[i].name);
83
-                        res=tests[i].fn(ui,teststrings[s].string,teststrings[s].nchars,teststrings[s].col,teststrings[s].offsetcol,teststrings[s].charsizeatoffsetcol);
84
-                                if(strcmp(res,TEST_OK)==0) {
85
-                                fprintf(stderr," ok.\n");
86
-                        } else {
87
-                                fprintf(stderr," ERROR: %s <= %s(\"%s\",%i,%i,%i,%i)\n",res,tests[i].name,teststrings[s].string,teststrings[s].nchars,teststrings[s].col,teststrings[s].offsetcol,teststrings[s].charsizeatoffsetcol);
88
-                                nerrors++;
89
-                                if(flag_exit) {
90
-                                        /* exit on first error */
91
-                                        s=sizeof(teststrings)/sizeof(teststrings[0]);
92
-                                        break;
93
-                                }
94
-                        }
95
-                        if(tests[i].needs_realui)
96
-                                reui_free(ui),ui=NULL;
97
-                }
98
-        }
99
-        fprintf(stderr,"\n");
100
-        if(nerrors==0)
101
-                fprintf(stderr,"All %i tests passed OK\n",total);
102
-        else
103
-                fprintf(stderr,"%i test(s) failed of %i tests run.\n",nerrors,total);
104
-        if(ui!=NULL && ui!=&dummyui)
105
-                reui_free(ui),ui=NULL;
106
-        return((nerrors==0)?0:1);
107
-}
108
-
109
-char *
110
-test_utf8len(reui_t *ui, char *teststring, int expectednchars, int dummy1, int dummy2, int dummy3)
111
-{
112
-        int res;
113
-        static char errorstr[1024];
114
-        res=reui_utf8len(ui,teststring,strlen(teststring));
115
-        if(res!=expectednchars) {
116
-                snprintf(errorstr,sizeof(errorstr),"expected %i chars, got %i chars",expectednchars,res);
117
-                errorstr[sizeof(errorstr)-1]='\0';
118
-                return(errorstr);
119
-        }
120
-        return(TEST_OK);
121
-}
122
-
123
-char *test_utf8col(reui_t *ui, char *teststring, int dummy0, int col, int expectedoffset, int dummy3)
124
-{
125
-        char *ptr;
126
-        static char errorstr[1024];
127
-        ptr=reui_utf8col(ui,teststring,strlen(teststring),col);
128
-        if(ptr!=(teststring+expectedoffset)) {
129
-                snprintf(errorstr,sizeof(errorstr),"expected offset %i, got offset %i (\"%s\")",expectedoffset,(int) ((ptr==NULL)?-1:ptr-teststring),ptr);
130
-                errorstr[sizeof(errorstr)-1]='\0';
131
-                return(errorstr);
132
-        }
133
-        return(TEST_OK);
134
-}
135
-
136
-char *
137
-test_utf8charlen(reui_t *ui, char *teststring, int dummy0, int dummy1, int offset,int charsizeatoffset)
138
-{
139
-        int res;
140
-        static char errorstr[1024];
141
-        res=reui_utf8charlen(ui,teststring+offset,strlen(teststring+offset));
142
-        if(res!=(charsizeatoffset)) {
143
-                snprintf(errorstr,sizeof(errorstr),"expected char size %i, got char size %i (\"%s\")",charsizeatoffset,res,teststring+offset);
144
-                errorstr[sizeof(errorstr)-1]='\0';
145
-                return(errorstr);
146
-        }
147
-        return(TEST_OK);
148
-}
149
-
150
-
... ...
@@ -219,11 +219,11 @@ re_processkey(re_t *re, SDL_Event *event)
219 219
                         return(-1); /* couldn't get current line data */
220 220
                 if(event->key.keysym.sym==SDLK_UP && newpos==0)
221 221
                         return(-1); /* going up but already at top */
222
-                oldcol=reui_utf8len(re->ui,ptr,re->cursorpos-newpos);
222
+                oldcol=redata_generic_utf8len(ptr,re->cursorpos-newpos);
223 223
                 if(redata_line_info(re->data,(event->key.keysym.sym==SDLK_DOWN)?(newpos+len):(newpos-1),&newpos2,&ptr,&len)==-1)
224 224
                         return(-1); /* couldn't get next line data */
225 225
                 has_nl=((len>0 && ptr[len-1]=='\n')?1:0);
226
-                ptr2=reui_utf8col(re->ui,ptr,len-has_nl,oldcol);
226
+                ptr2=redata_generic_utf8col(ptr,len-has_nl,oldcol);
227 227
                 if(ptr2!=NULL)
228 228
                         re->cursorpos=newpos2+(ptr2-ptr);
229 229
                 else
... ...
@@ -239,12 +239,12 @@ re_processkey(re_t *re, SDL_Event *event)
239 239
                         return(-1); /* couldn't get current line data */
240 240
                 if(event->key.keysym.sym==SDLK_LEFT && re->cursorpos==0)
241 241
                         return(-1); /* going left but already at leftmost char */
242
-                linecols=reui_utf8len(re->ui,ptr,len);
243
-                oldcol=reui_utf8len(re->ui,ptr,re->cursorpos-newpos);
242
+                linecols=redata_generic_utf8len(ptr,len);
243
+                oldcol=redata_generic_utf8len(ptr,re->cursorpos-newpos);
244 244
                 inc=(event->key.keysym.sym==SDLK_LEFT)?-1:1;
245 245
                 has_nl=((len>0 && ptr[len-1]=='\n')?1:0);
246 246
                 if(re->lastcol<=linecols) {
247
-                        ptr2=reui_utf8col(re->ui,ptr,len,re->lastcol+inc);
247
+                        ptr2=redata_generic_utf8col(ptr,len,re->lastcol+inc);
248 248
                 } else
249 249
                         ptr2=NULL;
250 250
                 if(ptr2!=NULL)
... ...
@@ -255,6 +255,8 @@ re_processkey(re_t *re, SDL_Event *event)
255 255
                         re->lastcol+=inc;
256 256
                 re_drawheader(re);
257 257
                 re->contentsdirty=1;
258
+        } else if(event->key.keysym.sym==SDLK_a) {
259
+                ;
258 260
         }
259 261
         return(-1);
260 262
 }
... ...
@@ -300,12 +302,12 @@ re_drawcontents(re_t *re)
300 302
                 has_nl=((len>0 && ptr[len-1]=='\n')?1:0);
301 303
                 reui_write(re->ui,re->x,y,"\x00\x00\x00\xff",(char *)ptr,len-has_nl);
302 304
                 if(row==re->lastrow) {
303
-     #warning DEBUG write of current char
305
+#warning DEBUG write of current char
304 306
                         reui_fill(re->ui,re->x+re->ui->fontwidth*re->lastcol,y,re->ui->fontwidth,re->ui->fontheight+1,"\x00\x00\x00\xff");
305 307
 #warning TODO: consider tabs
306
-                        curptr=reui_utf8col(re->ui,ptr,len-has_nl,re->lastcol);
308
+                        curptr=redata_generic_utf8col(ptr,len-has_nl,re->lastcol);
307 309
                         curptrlen=(curptr==NULL)?0:(len-has_nl)-(curptr-ptr);
308
-                        reui_write(re->ui,re->x+re->ui->fontwidth*re->lastcol,y,"\xff\xff\xff\xff",curptr,reui_utf8charlen(re->ui,ptr,curptrlen));
310
+                        reui_write(re->ui,re->x+re->ui->fontwidth*re->lastcol,y,"\xff\xff\xff\xff",curptr,redata_generic_utf8charlen(ptr,curptrlen));
309 311
 #warning TODO: if it is one of  '[','{','<','>','}',']', highlight the matching bracket/parens/anglebracket.
310 312
                 }
311 313
                 pos=newpos+len;