00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059 #include <ctype.h>
00060
00061 #include <iomanip>
00062 #include <string>
00063 #include <sstream>
00064
00065 #include "GNURegex.h"
00066 #include "Error.h"
00067 #include "InternalErr.h"
00068
00069 #include "debug.h"
00070
00071 using namespace std;
00072
00073 namespace libdap {
00074
00075
00076
00077
00078
00079 string
00080 hexstring(unsigned char val)
00081 {
00082 ostringstream buf;
00083 buf << hex << setw(2) << setfill('0')
00084 << static_cast<unsigned int>(val);
00085
00086 return buf.str();
00087 }
00088
00089 string
00090 unhexstring(string s)
00091 {
00092 int val;
00093 istringstream ss(s);
00094 ss >> hex >> val;
00095 char tmp_str[2];
00096 tmp_str[0] = static_cast<char>(val);
00097 tmp_str[1] = '\0';
00098 return string(tmp_str);
00099 }
00100
00101 string
00102 octstring(unsigned char val)
00103 {
00104 ostringstream buf;
00105 buf << oct << setw(3) << setfill('0')
00106 << static_cast<unsigned int>(val);
00107
00108 return buf.str();
00109 }
00110
00111 string
00112 unoctstring(string s)
00113 {
00114 int val;
00115
00116 istringstream ss(s);
00117 ss >> oct >> val;
00118
00119 DBG(cerr << "unoctstring: " << val << endl);
00120
00121 char tmp_str[2];
00122 tmp_str[0] = static_cast<char>(val);
00123 tmp_str[1] = '\0';
00124 return string(tmp_str);
00125 }
00126
00151 string
00152 id2www(string in, const string &allowable)
00153 {
00154 string::size_type i = 0;
00155
00156 while ((i = in.find_first_not_of(allowable, i)) != string::npos) {
00157 in.replace(i, 1, "%" + hexstring(in[i]));
00158 i++;
00159 }
00160
00161 return in;
00162 }
00163
00174 string
00175 id2www_ce(string in, const string &allowable)
00176 {
00177 return id2www(in, allowable);
00178 }
00179
00212 string
00213 www2id(const string &in, const string &escape, const string &except)
00214 {
00215 string::size_type i = 0;
00216 string res = in;
00217 while ((i = res.find_first_of(escape, i)) != string::npos) {
00218 if (except.find(res.substr(i, 3)) != string::npos) {
00219 i += 3;
00220 continue;
00221 }
00222 res.replace(i, 3, unhexstring(res.substr(i + 1, 2)));
00223 }
00224
00225 return res;
00226 }
00227
00228 static string
00229 entity(char c)
00230 {
00231 switch (c) {
00232 case '>': return ">";
00233 case '<': return "<";
00234 case '&': return "&";
00235 case '\'': return "'";
00236 case '\"': return """;
00237 default:
00238 throw InternalErr(__FILE__, __LINE__, "Unrecognized character.");
00239 }
00240 }
00241
00248 string
00249 id2xml(string in, const string ¬_allowed)
00250 {
00251 string::size_type i = 0;
00252
00253 while ((i = in.find_first_of(not_allowed, i)) != string::npos) {
00254 in.replace(i, 1, entity(in[i]));
00255 i++;
00256 }
00257
00258 return in;
00259 }
00260
00266 string
00267 xml2id(string in)
00268 {
00269 string::size_type i = 0;
00270
00271 while ((i = in.find(">", i)) != string::npos)
00272 in.replace(i, 4, ">");
00273
00274 i = 0;
00275 while ((i = in.find("<", i)) != string::npos)
00276 in.replace(i, 4, "<");
00277
00278 i = 0;
00279 while ((i = in.find("&", i)) != string::npos)
00280 in.replace(i, 5, "&");
00281
00282 i = 0;
00283 while ((i = in.find("'", i)) != string::npos)
00284 in.replace(i, 6, "'");
00285
00286 i = 0;
00287 while ((i = in.find(""", i)) != string::npos)
00288 in.replace(i, 6, "\"");
00289
00290 return in;
00291 }
00292
00298 string
00299 esc2underscore(string s)
00300 {
00301 string::size_type pos;
00302 while ((pos = s.find('%')) != string::npos)
00303 s.replace(pos, 3, "_");
00304
00305 return s;
00306 }
00307
00308
00312 string
00313 escattr(string s)
00314 {
00315 const string printable = " ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789~`!@#$%^&*()_-+={[}]|\\:;<,>.?/'\"";
00316 const string ESC = "\\";
00317 const string DOUBLE_ESC = ESC + ESC;
00318 const string QUOTE = "\"";
00319 const string ESCQUOTE = ESC + QUOTE;
00320
00321
00322 string::size_type ind = 0;
00323 while ((ind = s.find_first_not_of(printable, ind)) != s.npos)
00324 s.replace(ind, 1, ESC + octstring(s[ind]));
00325
00326
00327 ind = 0;
00328 while ((ind = s.find(ESC, ind)) != s.npos) {
00329 s.replace(ind, 1, DOUBLE_ESC);
00330 ind += DOUBLE_ESC.length();
00331 }
00332
00333
00334 ind = 0;
00335 while ((ind = s.find(QUOTE, ind)) != s.npos) {
00336 s.replace(ind, 1, ESCQUOTE);
00337 ind += ESCQUOTE.length();
00338 }
00339
00340 return s;
00341 }
00342
00351 string
00352 unescattr(string s)
00353 {
00354 Regex octal("\\\\[0-3][0-7][0-7]");
00355 Regex esc_quote("\\\\\"");
00356 Regex esc_esc("\\\\\\\\");
00357 const string ESC = "\\";
00358 const string QUOTE = "\"";
00359 int matchlen;
00360 unsigned int index;
00361
00362 DBG(cerr << "0XX" << s << "XXX" << endl);
00363
00364 index = esc_esc.search(s.c_str(), s.length(), matchlen, 0);
00365 while (index < s.length()) {
00366 DBG(cerr << "1aXX" << s << "XXX index: " << index << endl);
00367 s.replace(index, 2, ESC);
00368 DBG(cerr << "1bXX" << s << "XXX index: " << index << endl);
00369 index = esc_esc.search(s.c_str(), s.length(), matchlen, 0);
00370 }
00371
00372
00373 index = esc_quote.search(s.c_str(), s.length(), matchlen, 0);
00374 while (index < s.length()) {
00375 s.replace(index, 2, QUOTE);
00376 DBG(cerr << "2XX" << s << "XXX index: " << index << endl);
00377 index = esc_quote.search(s.c_str(), s.length(), matchlen, 0);
00378 }
00379
00380
00381 index = octal.search(s.c_str(), s.length(), matchlen, 0);
00382 while (index < s.length()) {
00383 s.replace(index, 4, unoctstring(s.substr(index + 1, 3)));
00384 DBG(cerr << "3XX" << s << "XXX index: " << index << endl);
00385 index = octal.search(s.c_str(), s.length(), matchlen, 0);
00386 }
00387
00388 DBG(cerr << "4XX" << s << "XXX" << endl);
00389 return s;
00390 }
00391
00392 string
00393 munge_error_message(string msg)
00394 {
00395
00396 if (*msg.begin() != '"')
00397 msg.insert(msg.begin(), '"');
00398 if (*(msg.end() - 1) != '"')
00399 msg += "\"";
00400
00401
00402 string::iterator miter;
00403 for (miter = msg.begin() + 1; miter != msg.end() - 1; miter++)
00404 if (*miter == '"' && *(miter - 1) != '\\')
00405 miter = msg.insert(miter, '\\');
00406
00407 return msg;
00408 }
00409
00410 }
00411