Skip to content

Commit 0a612e5

Browse files
committed
make string length function use our helper code point function
1 parent bd02fc2 commit 0a612e5

File tree

4 files changed

+84
-20
lines changed

4 files changed

+84
-20
lines changed

Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@ SOURCES = \
3535
source_map.cpp \
3636
to_c.cpp \
3737
to_string.cpp \
38-
units.cpp
38+
units.cpp \
39+
utf8_string.cpp
3940

4041
OBJECTS = $(SOURCES:.cpp=.o)
4142

functions.cpp

Lines changed: 3 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "to_string.hpp"
88
#include "inspect.hpp"
99
#include "eval.hpp"
10+
#include "utf8_string.hpp"
1011

1112
#include <cmath>
1213
#include <cctype>
@@ -695,7 +696,6 @@ namespace Sass {
695696
{
696697
String_Constant* s = ARG("$string", String_Constant);
697698
string str = s->value();
698-
size_t len = 0;
699699
size_t length_of_s = str.size();
700700
size_t i = 0;
701701

@@ -704,24 +704,8 @@ namespace Sass {
704704
--length_of_s;
705705
}
706706

707-
while (i < length_of_s) {
708-
unsigned char c = static_cast<unsigned char>(str[i]);
709-
if (c < 128) {
710-
// it's a single-byte character
711-
++len;
712-
++i;
713-
}
714-
// it's a multi bit sequence and presumably it's a leading bit
715-
else {
716-
++i; // go to the next byte
717-
// see if it's still part of the sequence
718-
while ((i < length_of_s) && ((static_cast<unsigned char>(str[i]) & 0b11000000) == 0b10000000)) {
719-
++i;
720-
}
721-
// when it's not [aka a new leading bit], increment and move on
722-
++len;
723-
}
724-
}
707+
size_t len = UTF_8::code_point_count(str, i, length_of_s);
708+
725709
return new (ctx.mem) Number(path, position, len);
726710
}
727711

utf8_string.cpp

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
#ifndef SASS_UTF8_STRING
2+
#define SASS_UTF8_STRING
3+
4+
#include <string>
5+
6+
namespace Sass {
7+
namespace UTF_8 {
8+
using std::string;
9+
// class utf8_string {
10+
// string s_;
11+
// public:
12+
// utf8_string(const string &s): s_(s) {}
13+
// utf8_string(const char* c): s_(string(c)) {}
14+
15+
// char operator[](size_t i);
16+
// size_t length();
17+
// size_t byte_to_char(size_t i);
18+
// };
19+
20+
// function that will count the number of code points (utf-8 characters) from the given beginning to the given end
21+
size_t code_point_count(const string& str, size_t start, size_t end) {
22+
size_t len = 0;
23+
size_t i = start;
24+
25+
while (i < end) {
26+
unsigned char c = static_cast<unsigned char>(str[i]);
27+
if (c < 128) {
28+
// it's a single-byte character
29+
++len;
30+
++i;
31+
}
32+
// it's a multi bit sequence and presumably it's a leading bit
33+
else {
34+
++i; // go to the next byte
35+
// see if it's still part of the sequence
36+
while ((i < end) && ((static_cast<unsigned char>(str[i]) & 0b11000000) == 0b10000000)) {
37+
++i;
38+
}
39+
// when it's not [aka a new leading bit], increment and move on
40+
++len;
41+
}
42+
}
43+
return len;
44+
}
45+
46+
size_t code_point_count(const string& str) {
47+
return code_point_count(str, 0, str.length());
48+
}
49+
50+
}
51+
}
52+
53+
#endif

utf8_string.hpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#ifndef SASS_UTF8_STRING
2+
#define SASS_UTF8_STRING
3+
4+
#include <string>
5+
6+
namespace Sass {
7+
namespace UTF_8 {
8+
// class utf8_string {
9+
// string s_;
10+
// public:
11+
// utf8_string(const string &s): s_(s) {}
12+
// utf8_string(const char* c): s_(string(c)) {}
13+
14+
// char operator[](size_t i);
15+
// size_t length();
16+
// size_t byte_to_char(size_t i);
17+
// };
18+
19+
// function that will count the number of code points (utf-8 characters) from the beginning to the given end
20+
size_t code_point_count(const string& str, size_t start, size_t end);
21+
size_t code_point_count(const string& str);
22+
23+
}
24+
}
25+
26+
#endif

0 commit comments

Comments
 (0)