GNUCgicc: cgicc/CgiUtils.cpp Source File

00001 /* -*-mode:c++; c-file-style: "gnu";-*- */
00002 /*
00003  *  $Id: CgiUtils_8cpp-source.html,v 1.1 2007/07/03 19:33:51 sebdiaz Exp $
00004  *
00005  *  Copyright (C) 1996 - 2004 Stephen F. Booth <sbooth@gnu.org>
00006  *                       2007 Sebastien DIAZ <sebastien.diaz@gmail.com>
00007  *  Part of the GNU cgicc library, http://www.gnu.org/software/cgicc
00008  *
00009  *  This library is free software; you can redistribute it and/or
00010  *  modify it under the terms of the GNU Lesser General Public
00011  *  License as published by the Free Software Foundation; either
00012  *  version 3 of the License, or (at your option) any later version.
00013  *
00014  *  This library is distributed in the hope that it will be useful,
00015  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017  *  Lesser General Public License for more details.
00018  *
00019  *  You should have received a copy of the GNU Lesser General Public
00020  *  License along with this library; if not, write to the Free Software
00021  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA 
00022  */
00023 
00024 #ifdef __GNUG__
00025 #  pragma implementation
00026 #endif
00027 
00028 #include <stdexcept>
00029 #include <memory>
00030 #include <vector>
00031 #include <iterator>     // for distance
00032 #include <cctype>       // for toupper, isxdigit
00033 
00034 #include "cgicc/CgiUtils.h"
00035 
00036 // case-insensitive string comparison
00037 // This code based on code from 
00038 // "The C++ Programming Language, Third Edition" by Bjarne Stroustrup
00039 bool 
00040 cgicc::stringsAreEqual(const std::string& s1, 
00041                        const std::string& s2)
00042 {
00043   std::string::const_iterator p1 = s1.begin();
00044   std::string::const_iterator p2 = s2.begin();
00045   std::string::const_iterator l1 = s1.end();
00046   std::string::const_iterator l2 = s2.end();
00047 
00048   while(p1 != l1 && p2 != l2) {
00049     if(std::toupper(*(p1++)) != std::toupper(*(p2++)))
00050       return false;
00051   }
00052 
00053   return (s2.size() == s1.size()) ? true : false;
00054 }
00055 
00056 // case-insensitive string comparison
00057 bool 
00058 cgicc::stringsAreEqual(const std::string& s1, 
00059                        const std::string& s2,
00060                        size_t n)
00061 {
00062   std::string::const_iterator p1 = s1.begin();
00063   std::string::const_iterator p2 = s2.begin();
00064   bool good = (n <= s1.length() && n <= s2.length());
00065   std::string::const_iterator l1 = good ? (s1.begin() + n) : s1.end();
00066   std::string::const_iterator l2 = good ? (s2.begin() + n) : s2.end();
00067 
00068   while(p1 != l1 && p2 != l2) {
00069     if(std::toupper(*(p1++)) != std::toupper(*(p2++)))
00070       return false;
00071   }
00072   
00073   return good;
00074 }
00075 
00076 std::string
00077 cgicc::charToHex(char c)
00078 {
00079   std::string result;
00080   char first, second;
00081 
00082   first = (c & 0xF0) / 16;
00083   first += first > 9 ? 'A' - 10 : '0';
00084   second = c & 0x0F;
00085   second += second > 9 ? 'A' - 10 : '0';
00086 
00087   result.append(1, first);
00088   result.append(1, second);
00089   
00090   return result;
00091 }
00092 
00093 char
00094 cgicc::hexToChar(char first,
00095                  char second)
00096 {
00097   int digit;
00098   
00099   digit = (first >= 'A' ? ((first & 0xDF) - 'A') + 10 : (first - '0'));
00100   digit *= 16;
00101   digit += (second >= 'A' ? ((second & 0xDF) - 'A') + 10 : (second - '0'));
00102   return static_cast<char>(digit);
00103 }
00104 
00105 /* 
00106    From the HTML standard: 
00107    <http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1>
00108 
00109    application/x-www-form-urlencoded  
00110 
00111    This is the default content type. Forms submitted with this content
00112    type must be encoded as follows:
00113 
00114    1. Control names and values are escaped. Space characters are
00115    replaced by `+', and then reserved characters are escaped as
00116    described in [RFC1738], section 2.2: Non-alphanumeric characters
00117    are replaced by `%HH', a percent sign and two hexadecimal digits
00118    representing the ASCII code of the character. Line breaks are
00119    represented as "CR LF" pairs (i.e., `%0D%0A').  
00120    2. The control names/values are listed in the order they appear in
00121    the document. The name is separated from the value by `=' and
00122    name/value pairs are separated from each other by `&'.
00123 
00124 
00125    Note RFC 1738 is obsoleted by RFC 2396.  Basically it says to
00126    escape out the reserved characters in the standard %xx format.  It
00127    also says this about the query string:
00128    
00129    query         = *uric
00130    uric          = reserved | unreserved | escaped
00131    reserved      = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
00132    "$" | ","
00133    unreserved    = alphanum | mark
00134    mark          = "-" | "_" | "." | "!" | "~" | "*" | "'" |
00135    "(" | ")"
00136    escaped = "%" hex hex */
00137  
00138 std::string
00139 cgicc::form_urlencode(const std::string& src)
00140 {
00141   std::string result;
00142   std::string::const_iterator iter;
00143   
00144   for(iter = src.begin(); iter != src.end(); ++iter) {
00145     switch(*iter) {
00146     case ' ':
00147       result.append(1, '+');
00148       break;
00149       // alnum
00150     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
00151     case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
00152     case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
00153     case 'V': case 'W': case 'X': case 'Y': case 'Z':
00154     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
00155     case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
00156     case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
00157     case 'v': case 'w': case 'x': case 'y': case 'z':
00158     case '0': case '1': case '2': case '3': case '4': case '5': case '6':
00159     case '7': case '8': case '9':
00160       // mark
00161     case '-': case '_': case '.': case '!': case '~': case '*': case '\'': 
00162     case '(': case ')':
00163       result.append(1, *iter);
00164       break;
00165       // escape
00166     default:
00167       result.append(1, '%');
00168       result.append(charToHex(*iter));
00169       break;
00170     }
00171   }
00172   
00173   return result;
00174 }
00175 
00176 std::string
00177 cgicc::form_urldecode(const std::string& src)
00178 {
00179   std::string result;
00180   std::string::const_iterator iter;
00181   char c;
00182 
00183   for(iter = src.begin(); iter != src.end(); ++iter) {
00184     switch(*iter) {
00185     case '+':
00186       result.append(1, ' ');
00187       break;
00188     case '%':
00189       // Don't assume well-formed input
00190       if(std::distance(iter, src.end()) >= 2
00191          && std::isxdigit(*(iter + 1)) && std::isxdigit(*(iter + 2))) {
00192         c = *++iter;
00193         result.append(1, hexToChar(c, *++iter));
00194       }
00195       // Just pass the % through untouched
00196       else {
00197         result.append(1, '%');
00198       }
00199       break;
00200     
00201     default:
00202       result.append(1, *iter);
00203       break;
00204     }
00205   }
00206   
00207   return result;
00208 }
00209 
00210 // locate data between separators, and return it
00211 std::string
00212 cgicc::extractBetween(const std::string& data, 
00213                       const std::string& separator1, 
00214                       const std::string& separator2)
00215 {
00216   std::string result;
00217   std::string::size_type start, limit;
00218   
00219   start = data.find(separator1, 0);
00220   if(std::string::npos != start) {
00221     start += separator1.length();
00222     limit = data.find(separator2, start);
00223     if(std::string::npos != limit)
00224       result = data.substr(start, limit - start);
00225   }
00226   
00227   return result;
00228 }
00229 
00230 // write a string
00231 void 
00232 cgicc::writeString(std::ostream& out, 
00233                    const std::string& s)
00234 { 
00235   out << s.length() << ' ';
00236   out.write(s.data(), s.length()); 
00237 }
00238 
00239 // write a long
00240 void 
00241 cgicc::writeLong(std::ostream& out, 
00242                  unsigned long l)
00243 { 
00244   out << l << ' '; 
00245 }
00246 
00247 // read a string
00248 std::string
00249 cgicc::readString(std::istream& in)
00250 {
00251   std::string::size_type dataSize = 0;
00252   
00253   in >> dataSize;
00254   in.get(); // skip ' '
00255   
00256   // Avoid allocation of a zero-length vector
00257   if(0 == dataSize) {
00258     return std::string();
00259   }
00260 
00261   // Don't use auto_ptr, but vector instead
00262   // Bug reported by bostjan@optonline.net / fix by alexoss@verizon.net
00263   std::vector<char> temp(dataSize);
00264 
00265   in.read(&temp[0], dataSize);
00266   if(static_cast<std::string::size_type>(in.gcount()) != dataSize) {
00267     throw std::runtime_error("I/O error");
00268   }
00269 
00270   return std::string(&temp[0], dataSize);
00271 }
00272 
00273 // read a long
00274 unsigned long
00275 cgicc::readLong(std::istream& in)
00276 {
00277   unsigned long l;
00278 
00279   in >> l;
00280   in.get(); // skip ' '
00281   return l;
00282 }