Browse code

Add function to extract the previous utf8 character to a pos. Add function to extract a substring from a redata buffer.

Dario Rodriguez authored on 08/09/2022 21:57:42
Showing 2 changed files
... ...
@@ -1716,6 +1716,76 @@ redata_getutf8char(redata_t *redata, long pos, char *buf, int len, int *usedbuf)
1716 1716
         return(0);
1717 1717
 }
1718 1718
 
1719
+int
1720
+redata_getprevutf8char(redata_t *redata, long pos, char *buf, int len, int *usedbuf)
1721
+{
1722
+        int numchunk;
1723
+        int offset;
1724
+        int ooff;
1725
+        int c;
1726
+        int n;
1727
+        char tmpchar;
1728
+        if(redata==NULL || pos<=0 || pos>redata_getsize(redata) || buf==NULL || len<1 || usedbuf==NULL)
1729
+                return(-1); /* sanity check failed */
1730
+        if(redata_getposptr(redata,pos,&numchunk,&offset)!=0)
1731
+                return(-1); /* couldn't get pos */
1732
+        while(numchunk>0 && offset==0) {
1733
+                numchunk--;
1734
+                offset=redata->chunks[numchunk]->useddata;
1735
+        }
1736
+        if(offset==0)
1737
+                return(-1); /* at start, no data */
1738
+        ooff=0;
1739
+        c=redata->chunks[numchunk]->data[offset-1];
1740
+        offset--;
1741
+        ((unsigned char *)buf)[ooff++]=c;
1742
+        while(ooff<len && (!UTF8_IS_ASCII(c) || UTF8_IS_MULTIBYTECONT(c))) {
1743
+                while(numchunk>0 && offset==0) {
1744
+                        numchunk--;
1745
+                        offset=redata->chunks[numchunk]->useddata;
1746
+                }
1747
+                if(numchunk==0 && offset==0)
1748
+                        break;
1749
+                c=redata->chunks[numchunk]->data[offset-1];
1750
+                offset--;
1751
+                ((unsigned char *)buf)[ooff++]=c;
1752
+        }
1753
+        *usedbuf=ooff;
1754
+        /* invert the data in buf */
1755
+        for(n=0;(ooff-1-n)>n;n++) {
1756
+                tmpchar=buf[n];
1757
+                buf[n]=buf[ooff-1-n];
1758
+                buf[ooff-1-n]=tmpchar;
1759
+        }
1760
+        return(0);
1761
+}
1762
+
1763
+int
1764
+redata_getsubstr(redata_t *redata, long posini, long posend, char *buf, int len, int *usedbuf)
1765
+{
1766
+        int numchunk;
1767
+        int offset;
1768
+        int ooff;
1769
+        long pos;
1770
+        if(redata==NULL || posini<=0 || posini>redata_getsize(redata) || posend<=0 || posend>redata_getsize(redata) || buf==NULL || len<1 || usedbuf==NULL)
1771
+                return(-1); /* sanity check failed */
1772
+        if(redata_getposptr(redata,posini,&numchunk,&offset)!=0)
1773
+                return(-1); /* couldn't get pos */
1774
+        ooff=0;
1775
+        pos=posini;
1776
+        while(ooff<len && pos<posend) {
1777
+                while(numchunk<redata->sizechunks && redata->chunks[numchunk]!=NULL && offset>=redata->chunks[numchunk]->useddata) {
1778
+                        numchunk++;
1779
+                        offset=0;
1780
+                }
1781
+                if(numchunk>=redata->sizechunks)
1782
+                        return(-1); /* at end; no data */
1783
+                buf[ooff++]=redata->chunks[numchunk]->data[offset++];
1784
+                pos++;
1785
+        }
1786
+        *usedbuf=ooff;
1787
+        return(0);
1788
+}
1719 1789
 
1720 1790
 int
1721 1791
 redata_generic_utf8len(char *ptr, int size)
... ...
@@ -174,6 +174,8 @@ long redata_searchbackwards(redata_t *redata, long posini, char *str, int len);
174 174
 int redata_memcmp(redata_t *redata, long pos, char *str, int len);
175 175
 
176 176
 int redata_getutf8char(redata_t *redata, long pos, char *buf, int len, int *usedbuf);
177
+int redata_getprevutf8char(redata_t *redata, long pos, char *buf, int len, int *usedbuf);
178
+int redata_getsubstr(redata_t *redata, long posini, long posend, char *buf, int len, int *usedbuf);
177 179
 
178 180
 /* utf8 convenience functions */
179 181