Skip to content

Commit 9673553

Browse files
author
Aaron Leung
committed
Merge pull request #371 from wonja/str_functions
yay more stuff Remaining string functions
2 parents bd02fc2 + c8e674c commit 9673553

File tree

6 files changed

+296
-21
lines changed

6 files changed

+296
-21
lines changed

Makefile

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
CXX ?= g++
2-
CXXFLAGS = -Wall -O2 -fPIC -g
2+
CXXFLAGS = -Wall -fPIC -g
33
LDFLAGS = -fPIC
44

55
PREFIX = /usr/local
@@ -35,7 +35,8 @@ SOURCES = \
3535
source_map.cpp \
3636
to_c.cpp \
3737
to_string.cpp \
38-
units.cpp
38+
units.cpp \
39+
utf8_string.cpp
3940

4041
OBJECTS = $(SOURCES:.cpp=.o)
4142

context.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,11 @@ namespace Sass {
362362
register_function(ctx, unquote_sig, sass_unquote, env);
363363
register_function(ctx, quote_sig, sass_quote, env);
364364
register_function(ctx, str_length_sig, str_length, env);
365+
register_function(ctx, str_insert_sig, str_insert, env);
366+
register_function(ctx, str_index_sig, str_index, env);
367+
register_function(ctx, str_slice_sig, str_slice, env);
368+
register_function(ctx, to_upper_case_sig, to_upper_case, env);
369+
register_function(ctx, to_lower_case_sig, to_lower_case, env);
365370
// Number Functions
366371
register_function(ctx, percentage_sig, percentage, env);
367372
register_function(ctx, round_sig, round, env);

functions.cpp

Lines changed: 120 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "to_string.hpp"
88
#include "inspect.hpp"
99
#include "eval.hpp"
10+
#include "utf8_string.hpp"
1011

1112
#include <cmath>
1213
#include <cctype>
@@ -695,7 +696,6 @@ namespace Sass {
695696
{
696697
String_Constant* s = ARG("$string", String_Constant);
697698
string str = s->value();
698-
size_t len = 0;
699699
size_t length_of_s = str.size();
700700
size_t i = 0;
701701

@@ -704,27 +704,128 @@ namespace Sass {
704704
--length_of_s;
705705
}
706706

707-
while (i < length_of_s) {
708-
unsigned char c = static_cast<unsigned char>(str[i]);
709-
if (c < 128) {
710-
// it's a single-byte character
711-
++len;
712-
++i;
713-
}
714-
// it's a multi bit sequence and presumably it's a leading bit
715-
else {
716-
++i; // go to the next byte
717-
// see if it's still part of the sequence
718-
while ((i < length_of_s) && ((static_cast<unsigned char>(str[i]) & 0b11000000) == 0b10000000)) {
719-
++i;
720-
}
721-
// when it's not [aka a new leading bit], increment and move on
722-
++len;
723-
}
724-
}
707+
size_t len = UTF_8::code_point_count(str, i, length_of_s);
708+
725709
return new (ctx.mem) Number(path, position, len);
726710
}
727711

712+
Signature str_insert_sig = "str-insert($string, $insert, $index)";
713+
BUILT_IN(str_insert)
714+
{
715+
String_Constant* s = ARG("$string", String_Constant);
716+
string str = s->value();
717+
char quotemark = s->quote_mark();
718+
str = unquote(str);
719+
String_Constant* i = ARG("$insert", String_Constant);
720+
string ins = i->value();
721+
ins = unquote(ins);
722+
Number* ind = ARG("$index", Number);
723+
double index = ind->value();
724+
size_t len = UTF_8::code_point_count(str, 0, str.size());
725+
726+
if (index > 0 && index <= len) {
727+
// positive and within string length
728+
str.insert(UTF_8::code_point_offset_to_byte_offset(str, index-1), ins);
729+
}
730+
else if (index > len) {
731+
// positive and past string length
732+
str += ins;
733+
}
734+
else if (index == 0) {
735+
str = ins + str;
736+
}
737+
else if (std::abs(index) <= len) {
738+
// negative and within string length
739+
index += len + 1;
740+
str.insert(UTF_8::code_point_offset_to_byte_offset(str, index), ins);
741+
}
742+
else {
743+
// negative and past string length
744+
str = ins + str;
745+
}
746+
747+
if (quotemark) {
748+
str = quote(str, quotemark);
749+
}
750+
751+
return new (ctx.mem) String_Constant(path, position, str);
752+
753+
}
754+
755+
Signature str_index_sig = "str-index($string, $substring)";
756+
BUILT_IN(str_index)
757+
{
758+
String_Constant* s = ARG("$string", String_Constant);
759+
String_Constant* t = ARG("$substring", String_Constant);
760+
string str = s->value();
761+
str = unquote(str);
762+
string substr = t->value();
763+
substr = unquote(substr);
764+
765+
size_t c_index = str.find(substr);
766+
if(c_index == string::npos) {
767+
return new (ctx.mem) Null(path, position);
768+
}
769+
size_t index = UTF_8::code_point_count(str, 0, c_index + 1);
770+
771+
return new (ctx.mem) Number(path, position, index);
772+
}
773+
774+
Signature str_slice_sig = "str-slice($string, $start-at, $end-at:-1)";
775+
BUILT_IN(str_slice)
776+
{
777+
String_Constant* s = ARG("$string", String_Constant);
778+
Number* n = ARG("$start-at", Number);
779+
Number* m = ARG("$end-at", Number);
780+
781+
string str = s->value();
782+
char quotemark = s->quote_mark();
783+
str = unquote(str);
784+
785+
// normalize into 0-based indices
786+
size_t start = UTF_8::code_point_offset_to_byte_offset(str, UTF_8::normalize_index(n->value(), UTF_8::code_point_count(str)));
787+
size_t end = UTF_8::code_point_offset_to_byte_offset(str, UTF_8::normalize_index(m->value(), UTF_8::code_point_count(str)));
788+
789+
string newstr;
790+
if(start - end == 0) {
791+
newstr = str.substr(start, end - start);
792+
} else {
793+
newstr = str.substr(start, end - start + UTF_8::length_of_code_point_at(str, end));
794+
}
795+
if(quotemark) {
796+
newstr = quote(newstr, quotemark);
797+
}
798+
799+
return new (ctx.mem) String_Constant(path, position, newstr);
800+
801+
}
802+
803+
Signature to_upper_case_sig = "to-upper-case($string)";
804+
BUILT_IN(to_upper_case)
805+
{
806+
String_Constant* s = ARG("$string", String_Constant);
807+
string str = s->value();
808+
809+
for (size_t i = 0, L = str.length(); i < L; ++i) {
810+
str[i] = std::toupper(str[i]);
811+
}
812+
813+
return new (ctx.mem) String_Constant(path, position, str);
814+
}
815+
816+
Signature to_lower_case_sig = "to-lower-case($string)";
817+
BUILT_IN(to_lower_case)
818+
{
819+
String_Constant* s = ARG("$string", String_Constant);
820+
string str = s->value();
821+
822+
for (size_t i = 0, L = str.length(); i < L; ++i) {
823+
str[i] = std::tolower(str[i]);
824+
}
825+
826+
return new (ctx.mem) String_Constant(path, position, str);
827+
}
828+
728829
///////////////////
729830
// NUMBER FUNCTIONS
730831
///////////////////

functions.hpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,11 @@ namespace Sass {
6565
extern Signature unquote_sig;
6666
extern Signature quote_sig;
6767
extern Signature str_length_sig;
68+
extern Signature str_insert_sig;
69+
extern Signature str_index_sig;
70+
extern Signature str_slice_sig;
71+
extern Signature to_upper_case_sig;
72+
extern Signature to_lower_case_sig;
6873
extern Signature percentage_sig;
6974
extern Signature round_sig;
7075
extern Signature ceil_sig;
@@ -117,6 +122,11 @@ namespace Sass {
117122
BUILT_IN(sass_unquote);
118123
BUILT_IN(sass_quote);
119124
BUILT_IN(str_length);
125+
BUILT_IN(str_insert);
126+
BUILT_IN(str_index);
127+
BUILT_IN(str_slice);
128+
BUILT_IN(to_upper_case);
129+
BUILT_IN(to_lower_case);
120130
BUILT_IN(percentage);
121131
BUILT_IN(round);
122132
BUILT_IN(ceil);

utf8_string.cpp

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
#ifndef SASS_UTF8_STRING
2+
#define SASS_UTF8_STRING
3+
4+
#include <string>
5+
6+
namespace Sass {
7+
namespace UTF_8 {
8+
using std::string;
9+
// class utf8_string {
10+
// string s_;
11+
// public:
12+
// utf8_string(const string &s): s_(s) {}
13+
// utf8_string(const char* c): s_(string(c)) {}
14+
15+
// char operator[](size_t i);
16+
// size_t length();
17+
// size_t byte_to_char(size_t i);
18+
// };
19+
20+
// function that will count the number of code points (utf-8 characters) from the given beginning to the given end
21+
size_t code_point_count(const string& str, size_t start, size_t end) {
22+
size_t len = 0;
23+
size_t i = start;
24+
25+
while (i < end) {
26+
unsigned char c = static_cast<unsigned char>(str[i]);
27+
if (c < 128) {
28+
// it's a single-byte character
29+
++len;
30+
++i;
31+
}
32+
// it's a multi bit sequence and presumably it's a leading bit
33+
else {
34+
++i; // go to the next byte
35+
// see if it's still part of the sequence
36+
while ((i < end) && ((static_cast<unsigned char>(str[i]) & 0b11000000) == 0b10000000)) {
37+
++i;
38+
}
39+
// when it's not [aka a new leading bit], increment and move on
40+
++len;
41+
}
42+
}
43+
return len;
44+
}
45+
46+
size_t code_point_count(const string& str) {
47+
return code_point_count(str, 0, str.length());
48+
}
49+
50+
// function that will return the byte offset of a code point in a
51+
size_t code_point_offset_to_byte_offset(const string& str, size_t offset) {
52+
size_t i = 0;
53+
size_t len = 0;
54+
55+
while (len < offset) {
56+
unsigned char c = static_cast<unsigned char>(str[i]);
57+
if (c < 128) {
58+
// it's a single-byte character
59+
++len;
60+
++i;
61+
}
62+
// it's a multi bit sequence and presumably it's a leading bit
63+
else {
64+
++i; // go to the next byte
65+
// see if it's still part of the sequence
66+
while ((i < str.length()) && ((static_cast<unsigned char>(str[i]) & 0b11000000) == 0b10000000)) {
67+
++i;
68+
}
69+
// when it's not [aka a new leading bit], increment and move on
70+
++len;
71+
}
72+
}
73+
return i;
74+
}
75+
76+
// function that returns number of bytes in a character in a string
77+
size_t length_of_code_point_at(const string& str, size_t pos) {
78+
unsigned char c = static_cast<unsigned char>(str[pos]);
79+
size_t i = 0;
80+
if(c < 128) {
81+
return 1;
82+
} else {
83+
++i; // go to the next byte
84+
++pos;
85+
// see if it's still part of the sequence
86+
while ((i < str.length()) && ((static_cast<unsigned char>(str[pos]) & 0b11000000) == 0b10000000)) {
87+
++i;
88+
++pos;
89+
}
90+
}
91+
return i;
92+
}
93+
94+
// function that will return a normalized index, given a crazy one
95+
size_t normalize_index(int index, size_t len) {
96+
int signed_len = len;
97+
// assuming the index is 1-based
98+
// we are returning a 0-based index
99+
if (index > 0 && index <= signed_len) {
100+
// positive and within string length
101+
return index-1;
102+
}
103+
else if (index > signed_len) {
104+
// positive and past string length
105+
return len;
106+
}
107+
else if (index == 0) {
108+
return 0;
109+
}
110+
else if (std::abs(index) <= signed_len) {
111+
// negative and within string length
112+
return index + signed_len;
113+
}
114+
else {
115+
// negative and past string length
116+
return 0;
117+
}
118+
}
119+
120+
}
121+
}
122+
123+
#endif

utf8_string.hpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#ifndef SASS_UTF8_STRING
2+
#define SASS_UTF8_STRING
3+
4+
#include <string>
5+
6+
namespace Sass {
7+
namespace UTF_8 {
8+
// class utf8_string {
9+
// string s_;
10+
// public:
11+
// utf8_string(const string &s): s_(s) {}
12+
// utf8_string(const char* c): s_(string(c)) {}
13+
14+
// char operator[](size_t i);
15+
// size_t length();
16+
// size_t byte_to_char(size_t i);
17+
// };
18+
19+
// function that will count the number of code points (utf-8 characters) from the beginning to the given end
20+
size_t code_point_count(const string& str, size_t start, size_t end);
21+
size_t code_point_count(const string& str);
22+
23+
// function that will return the byte offset of a code point in a
24+
size_t code_point_offset_to_byte_offset(const string& str, size_t offset);
25+
26+
// function that returns number of bytes in a character in a string
27+
size_t length_of_code_point_at(const string& str, size_t pos);
28+
29+
// function that will return a normalized index, given a crazy one
30+
size_t normalize_index(int index, size_t len);
31+
32+
}
33+
}
34+
35+
#endif

0 commit comments

Comments
 (0)