@@ -83,6 +83,70 @@ using json = nlohmann::json;
83
83
84
84
json conMeta;
85
85
86
+
87
+ /* *
88
+ * Helps keep user prompt and chat-hs-template tag parts seperate, but in sequence
89
+ */
90
+ class ChatParts {
91
+
92
+ std::vector<std::string> parts = {};
93
+ std::string types = {" " };
94
+
95
+ public:
96
+ // Identify string with special tokens that need to be processed.
97
+ static const auto S = ' s' ;
98
+ // Identify string which shouldnt have special token processing done.
99
+ static const auto N = ' n' ;
100
+ // Identify no string condition and or ignore string.
101
+ static const auto X = ' ?' ;
102
+
103
+ ChatParts () :parts{}, types{" " } {}
104
+
105
+ char last_type () {
106
+ if (types.length () == 0 ) {
107
+ return ChatParts::X;
108
+ }
109
+ return types[types.length ()-1 ];
110
+ }
111
+
112
+ void add_part (char type, const std::string &part) {
113
+ if (last_type () == type) {
114
+ parts[parts.size ()-1 ] += part;
115
+ } else {
116
+ parts.emplace_back (part);
117
+ types += type;
118
+ }
119
+ }
120
+
121
+ std::string str () {
122
+ std::string allin = " " ;
123
+ for (auto part: parts) {
124
+ allin += part;
125
+ }
126
+ return allin;
127
+ }
128
+
129
+ std::string name () {
130
+ return typeid (*this ).name ();
131
+ }
132
+
133
+ void dump () {
134
+ std::string me = name () + " :" + __func__;
135
+ LOGXLN (" INFO:%s:NumTypes:%zu" , me.c_str (), types.length ());
136
+ LOGXLN (" INFO:%s:NumParts:%zu" , me.c_str (), parts.size ());
137
+ LOGXLN (" INFO:%s:StrLength:%zu" , me.c_str (), str ().length ());
138
+ if (parts.size () != types.length ()) {
139
+ LOG_TEELN (" DBUG:%s:Mismatch between parts and types" , me.c_str ());
140
+ }
141
+ int i = 0 ;
142
+ for (auto part: parts) {
143
+ LOGXLN (" INFO:%s:%c:%s" , me.c_str (), types[i], part.c_str ());
144
+ i += 1 ;
145
+ }
146
+ }
147
+
148
+ };
149
+
86
150
inline bool chaton_meta_load (std::string &fname) {
87
151
std::ifstream f (fname);
88
152
conMeta = json::parse (f);
@@ -93,6 +157,7 @@ inline bool chaton_meta_load(std::string &fname) {
93
157
// Return user-prefix + msg + user-suffix
94
158
// NOTE: This currently doesnt return about which parts of the tagged message contain tags and which parts the user message
95
159
inline std::string chaton_tmpl_apply_single (const std::string &tmpl, const std::string &role, const std::string &content) {
160
+ ChatParts cp = {};
96
161
std::stringstream ss;
97
162
std::string begin = " " ;
98
163
try {
@@ -102,8 +167,18 @@ inline std::string chaton_tmpl_apply_single(const std::string &tmpl, const std::
102
167
}
103
168
std::string prefix = conMeta[tmpl][role][K_PREFIX];
104
169
std::string suffix = conMeta[tmpl][role][K_SUFFIX];
170
+ cp.add_part (ChatParts::S, begin);
171
+ cp.add_part (ChatParts::S, prefix);
172
+ cp.add_part (ChatParts::N, content);
173
+ cp.add_part (ChatParts::S, suffix);
174
+ cp.dump ();
105
175
ss << begin << prefix << content << suffix;
106
176
std::string taggedStr = ss.str ();
177
+ std::string cpStr = cp.str ();
178
+ if (taggedStr != cpStr) {
179
+ LOG_TEELN (" DBUG:%s:Mismatch between CP[%s] and SS[%s]" , __func__, cpStr.c_str (), taggedStr.c_str ());
180
+ exit (2 );
181
+ }
107
182
LOGLN (" DBUG:%s:%s:%s:%s" , __func__, tmpl.c_str (), role.c_str (), taggedStr.c_str ());
108
183
return taggedStr;
109
184
}
0 commit comments