diff options
Diffstat (limited to '3rdParty/Breakpad/src/common')
57 files changed, 15137 insertions, 0 deletions
diff --git a/3rdParty/Breakpad/src/common/byte_cursor.h b/3rdParty/Breakpad/src/common/byte_cursor.h new file mode 100644 index 0000000..accd54e --- /dev/null +++ b/3rdParty/Breakpad/src/common/byte_cursor.h @@ -0,0 +1,265 @@ +// -*- mode: c++ -*- + +// Copyright (c) 2010, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// byte_cursor.h: Classes for parsing values from a buffer of bytes. +// The ByteCursor class provides a convenient interface for reading +// fixed-size integers of arbitrary endianness, being thorough about +// checking for buffer overruns. + +#ifndef COMMON_BYTE_CURSOR_H_ +#define COMMON_BYTE_CURSOR_H_ + +#include <assert.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <string> + +#include "common/using_std_string.h" + +namespace google_breakpad { + +// A buffer holding a series of bytes. +struct ByteBuffer { + ByteBuffer() : start(0), end(0) { } + ByteBuffer(const uint8_t *set_start, size_t set_size) + : start(set_start), end(set_start + set_size) { } + ~ByteBuffer() { }; + + // Equality operators. Useful in unit tests, and when we're using + // ByteBuffers to refer to regions of a larger buffer. + bool operator==(const ByteBuffer &that) const { + return start == that.start && end == that.end; + } + bool operator!=(const ByteBuffer &that) const { + return start != that.start || end != that.end; + } + + // Not C++ style guide compliant, but this definitely belongs here. + size_t Size() const { + assert(start <= end); + return end - start; + } + + const uint8_t *start, *end; +}; + +// A cursor pointing into a ByteBuffer that can parse numbers of various +// widths and representations, strings, and data blocks, advancing through +// the buffer as it goes. All ByteCursor operations check that accesses +// haven't gone beyond the end of the enclosing ByteBuffer. +class ByteCursor { + public: + // Create a cursor reading bytes from the start of BUFFER. By default, the + // cursor reads multi-byte values in little-endian form. + ByteCursor(const ByteBuffer *buffer, bool big_endian = false) + : buffer_(buffer), here_(buffer->start), + big_endian_(big_endian), complete_(true) { } + + // Accessor and setter for this cursor's endianness flag. + bool big_endian() const { return big_endian_; } + void set_big_endian(bool big_endian) { big_endian_ = big_endian; } + + // Accessor and setter for this cursor's current position. The setter + // returns a reference to this cursor. + const uint8_t *here() const { return here_; } + ByteCursor &set_here(const uint8_t *here) { + assert(buffer_->start <= here && here <= buffer_->end); + here_ = here; + return *this; + } + + // Return the number of bytes available to read at the cursor. + size_t Available() const { return size_t(buffer_->end - here_); } + + // Return true if this cursor is at the end of its buffer. + bool AtEnd() const { return Available() == 0; } + + // When used as a boolean value this cursor converts to true if all + // prior reads have been completed, or false if we ran off the end + // of the buffer. + operator bool() const { return complete_; } + + // Read a SIZE-byte integer at this cursor, signed if IS_SIGNED is true, + // unsigned otherwise, using the cursor's established endianness, and set + // *RESULT to the number. If we read off the end of our buffer, clear + // this cursor's complete_ flag, and store a dummy value in *RESULT. + // Return a reference to this cursor. + template<typename T> + ByteCursor &Read(size_t size, bool is_signed, T *result) { + if (CheckAvailable(size)) { + T v = 0; + if (big_endian_) { + for (size_t i = 0; i < size; i++) + v = (v << 8) + here_[i]; + } else { + // This loop condition looks weird, but size_t is unsigned, so + // decrementing i after it is zero yields the largest size_t value. + for (size_t i = size - 1; i < size; i--) + v = (v << 8) + here_[i]; + } + if (is_signed && size < sizeof(T)) { + size_t sign_bit = (T)1 << (size * 8 - 1); + v = (v ^ sign_bit) - sign_bit; + } + here_ += size; + *result = v; + } else { + *result = (T) 0xdeadbeef; + } + return *this; + } + + // Read an integer, using the cursor's established endianness and + // *RESULT's size and signedness, and set *RESULT to the number. If we + // read off the end of our buffer, clear this cursor's complete_ flag. + // Return a reference to this cursor. + template<typename T> + ByteCursor &operator>>(T &result) { + bool T_is_signed = (T)-1 < 0; + return Read(sizeof(T), T_is_signed, &result); + } + + // Copy the SIZE bytes at the cursor to BUFFER, and advance this + // cursor to the end of them. If we read off the end of our buffer, + // clear this cursor's complete_ flag, and set *POINTER to NULL. + // Return a reference to this cursor. + ByteCursor &Read(uint8_t *buffer, size_t size) { + if (CheckAvailable(size)) { + memcpy(buffer, here_, size); + here_ += size; + } + return *this; + } + + // Set STR to a copy of the '\0'-terminated string at the cursor. If the + // byte buffer does not contain a terminating zero, clear this cursor's + // complete_ flag, and set STR to the empty string. Return a reference to + // this cursor. + ByteCursor &CString(string *str) { + const uint8_t *end + = static_cast<const uint8_t *>(memchr(here_, '\0', Available())); + if (end) { + str->assign(reinterpret_cast<const char *>(here_), end - here_); + here_ = end + 1; + } else { + str->clear(); + here_ = buffer_->end; + complete_ = false; + } + return *this; + } + + // Like CString(STR), but extract the string from a fixed-width buffer + // LIMIT bytes long, which may or may not contain a terminating '\0' + // byte. Specifically: + // + // - If there are not LIMIT bytes available at the cursor, clear the + // cursor's complete_ flag and set STR to the empty string. + // + // - Otherwise, if the LIMIT bytes at the cursor contain any '\0' + // characters, set *STR to a copy of the bytes before the first '\0', + // and advance the cursor by LIMIT bytes. + // + // - Otherwise, set *STR to a copy of those LIMIT bytes, and advance the + // cursor by LIMIT bytes. + ByteCursor &CString(string *str, size_t limit) { + if (CheckAvailable(limit)) { + const uint8_t *end + = static_cast<const uint8_t *>(memchr(here_, '\0', limit)); + if (end) + str->assign(reinterpret_cast<const char *>(here_), end - here_); + else + str->assign(reinterpret_cast<const char *>(here_), limit); + here_ += limit; + } else { + str->clear(); + } + return *this; + } + + // Set *POINTER to point to the SIZE bytes at the cursor, and advance + // this cursor to the end of them. If SIZE is omitted, don't move the + // cursor. If we read off the end of our buffer, clear this cursor's + // complete_ flag, and set *POINTER to NULL. Return a reference to this + // cursor. + ByteCursor &PointTo(const uint8_t **pointer, size_t size = 0) { + if (CheckAvailable(size)) { + *pointer = here_; + here_ += size; + } else { + *pointer = NULL; + } + return *this; + } + + // Skip SIZE bytes at the cursor. If doing so would advance us off + // the end of our buffer, clear this cursor's complete_ flag, and + // set *POINTER to NULL. Return a reference to this cursor. + ByteCursor &Skip(size_t size) { + if (CheckAvailable(size)) + here_ += size; + return *this; + } + + private: + // If there are at least SIZE bytes available to read from the buffer, + // return true. Otherwise, set here_ to the end of the buffer, set + // complete_ to false, and return false. + bool CheckAvailable(size_t size) { + if (Available() >= size) { + return true; + } else { + here_ = buffer_->end; + complete_ = false; + return false; + } + } + + // The buffer we're reading bytes from. + const ByteBuffer *buffer_; + + // The next byte within buffer_ that we'll read. + const uint8_t *here_; + + // True if we should read numbers in big-endian form; false if we + // should read in little-endian form. + bool big_endian_; + + // True if we've been able to read all we've been asked to. + bool complete_; +}; + +} // namespace google_breakpad + +#endif // COMMON_BYTE_CURSOR_H_ diff --git a/3rdParty/Breakpad/src/common/convert_UTF.c b/3rdParty/Breakpad/src/common/convert_UTF.c new file mode 100644 index 0000000..80178d3 --- /dev/null +++ b/3rdParty/Breakpad/src/common/convert_UTF.c @@ -0,0 +1,533 @@ +/* + * Copyright 2001-2004 Unicode, Inc. + * + * Disclaimer + * + * This source code is provided as is by Unicode, Inc. No claims are + * made as to fitness for any particular purpose. No warranties of any + * kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been + * purchased on magnetic or optical media from Unicode, Inc., the + * sole remedy for any claim will be exchange of defective media + * within 90 days of receipt. + * + * Limitations on Rights to Redistribute This Code + * + * Unicode, Inc. hereby grants the right to freely use the information + * supplied in this file in the creation of products supporting the + * Unicode Standard, and to make copies of this file in any form + * for internal or external distribution as long as this notice + * remains attached. + */ + +/* --------------------------------------------------------------------- + +Conversions between UTF32, UTF-16, and UTF-8. Source code file. +Author: Mark E. Davis, 1994. +Rev History: Rick McGowan, fixes & updates May 2001. +Sept 2001: fixed const & error conditions per +mods suggested by S. Parent & A. Lillich. +June 2002: Tim Dodd added detection and handling of incomplete +source sequences, enhanced error detection, added casts +to eliminate compiler warnings. +July 2003: slight mods to back out aggressive FFFE detection. +Jan 2004: updated switches in from-UTF8 conversions. +Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions. + +See the header file "ConvertUTF.h" for complete documentation. + +------------------------------------------------------------------------ */ + + +#include "convert_UTF.h" +#ifdef CVTUTF_DEBUG +#include <stdio.h> +#endif + +static const int halfShift = 10; /* used for shifting by 10 bits */ + +static const UTF32 halfBase = 0x0010000UL; +static const UTF32 halfMask = 0x3FFUL; + +#define UNI_SUR_HIGH_START (UTF32)0xD800 +#define UNI_SUR_HIGH_END (UTF32)0xDBFF +#define UNI_SUR_LOW_START (UTF32)0xDC00 +#define UNI_SUR_LOW_END (UTF32)0xDFFF +#define false 0 +#define true 1 + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF32toUTF16 (const UTF32** sourceStart, const UTF32* sourceEnd, + UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF32* source = *sourceStart; + UTF16* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch; + if (target >= targetEnd) { + result = targetExhausted; break; + } + ch = *source++; + if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */ + /* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { + if (flags == strictConversion) { + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + *target++ = (UTF16)ch; /* normal case */ + } + } else if (ch > UNI_MAX_LEGAL_UTF32) { + if (flags == strictConversion) { + result = sourceIllegal; + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + /* target is a character in range 0xFFFF - 0x10FFFF. */ + if (target + 1 >= targetEnd) { + --source; /* Back up source pointer! */ + result = targetExhausted; break; + } + ch -= halfBase; + *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START); + *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START); + } + } +*sourceStart = source; +*targetStart = target; +return result; +} + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF16toUTF32 (const UTF16** sourceStart, const UTF16* sourceEnd, + UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF16* source = *sourceStart; + UTF32* target = *targetStart; + UTF32 ch, ch2; + while (source < sourceEnd) { + const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */ + ch = *source++; + /* If we have a surrogate pair, convert to UTF32 first. */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { + /* If the 16 bits following the high surrogate are in the source buffer... */ + if (source < sourceEnd) { + ch2 = *source; + /* If it's a low surrogate, convert to UTF32. */ + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) { + ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + + (ch2 - UNI_SUR_LOW_START) + halfBase; + ++source; + } else if (flags == strictConversion) { /* it's an unpaired high surrogate */ + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } else { /* We don't have the 16 bits following the high surrogate. */ + --source; /* return to the high surrogate */ + result = sourceExhausted; + break; + } + } else if (flags == strictConversion) { + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } + if (target >= targetEnd) { + source = oldSource; /* Back up source pointer! */ + result = targetExhausted; break; + } + *target++ = ch; + } + *sourceStart = source; + *targetStart = target; +#ifdef CVTUTF_DEBUG + if (result == sourceIllegal) { + fprintf(stderr, "ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2); + fflush(stderr); + } +#endif + return result; +} + +/* --------------------------------------------------------------------- */ + +/* + * Index into the table below with the first byte of a UTF-8 sequence to + * get the number of trailing bytes that are supposed to follow it. + * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is + * left as-is for anyone who may want to do such conversion, which was + * allowed in earlier algorithms. + */ +static const char trailingBytesForUTF8[256] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 +}; + +/* + * Magic values subtracted from a buffer value during UTF8 conversion. + * This table contains as many values as there might be trailing bytes + * in a UTF-8 sequence. + */ +static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, + 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; + +/* + * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed + * into the first byte, depending on how many bytes follow. There are + * as many entries in this table as there are UTF-8 sequence types. + * (I.e., one byte sequence, two byte... etc.). Remember that sequencs + * for *legal* UTF-8 will be 4 or fewer bytes total. + */ +static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; + +/* --------------------------------------------------------------------- */ + +/* The interface converts a whole buffer to avoid function-call overhead. +* Constants have been gathered. Loops & conditionals have been removed as +* much as possible for efficiency, in favor of drop-through switches. +* (See "Note A" at the bottom of the file for equivalent code.) +* If your compiler supports it, the "isLegalUTF8" call can be turned +* into an inline function. +*/ + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF16toUTF8 (const UTF16** sourceStart, const UTF16* sourceEnd, + UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF16* source = *sourceStart; + UTF8* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch; + unsigned short bytesToWrite = 0; + const UTF32 byteMask = 0xBF; + const UTF32 byteMark = 0x80; + const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */ + ch = *source++; + /* If we have a surrogate pair, convert to UTF32 first. */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { + /* If the 16 bits following the high surrogate are in the source buffer... */ + if (source < sourceEnd) { + UTF32 ch2 = *source; + /* If it's a low surrogate, convert to UTF32. */ + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) { + ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + + (ch2 - UNI_SUR_LOW_START) + halfBase; + ++source; + } else if (flags == strictConversion) { /* it's an unpaired high surrogate */ + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } else { /* We don't have the 16 bits following the high surrogate. */ + --source; /* return to the high surrogate */ + result = sourceExhausted; + break; + } + } else if (flags == strictConversion) { + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } + /* Figure out how many bytes the result will require */ + if (ch < (UTF32)0x80) { bytesToWrite = 1; + } else if (ch < (UTF32)0x800) { bytesToWrite = 2; + } else if (ch < (UTF32)0x10000) { bytesToWrite = 3; + } else if (ch < (UTF32)0x110000) { bytesToWrite = 4; + } else { bytesToWrite = 3; + ch = UNI_REPLACEMENT_CHAR; + } + + target += bytesToWrite; + if (target > targetEnd) { + source = oldSource; /* Back up source pointer! */ + target -= bytesToWrite; result = targetExhausted; break; + } + switch (bytesToWrite) { /* note: everything falls through. */ + case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 1: *--target = (UTF8)(ch | firstByteMark[bytesToWrite]); + } + target += bytesToWrite; + } +*sourceStart = source; +*targetStart = target; +return result; +} + +/* --------------------------------------------------------------------- */ + +/* + * Utility routine to tell whether a sequence of bytes is legal UTF-8. + * This must be called with the length pre-determined by the first byte. + * If not calling this from ConvertUTF8to*, then the length can be set by: + * length = trailingBytesForUTF8[*source]+1; + * and the sequence is illegal right away if there aren't that many bytes + * available. + * If presented with a length > 4, this returns false. The Unicode + * definition of UTF-8 goes up to 4-byte sequences. + */ + +static Boolean isLegalUTF8(const UTF8 *source, int length) { + UTF8 a; + const UTF8 *srcptr = source+length; + switch (length) { + default: return false; + /* Everything else falls through when "true"... */ + case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; + case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; + case 2: if ((a = (*--srcptr)) > 0xBF) return false; + + switch (*source) { + /* no fall-through in this inner switch */ + case 0xE0: if (a < 0xA0) return false; break; + case 0xED: if (a > 0x9F) return false; break; + case 0xF0: if (a < 0x90) return false; break; + case 0xF4: if (a > 0x8F) return false; break; + default: if (a < 0x80) return false; + } + + case 1: if (*source >= 0x80 && *source < 0xC2) return false; + } + if (*source > 0xF4) return false; + return true; +} + +/* --------------------------------------------------------------------- */ + +/* + * Exported function to return whether a UTF-8 sequence is legal or not. + * This is not used here; it's just exported. + */ +Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) { + int length = trailingBytesForUTF8[*source]+1; + if (source+length > sourceEnd) { + return false; + } + return isLegalUTF8(source, length); +} + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF8toUTF16 (const UTF8** sourceStart, const UTF8* sourceEnd, + UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF8* source = *sourceStart; + UTF16* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch = 0; + unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; + if (source + extraBytesToRead >= sourceEnd) { + result = sourceExhausted; break; + } + /* Do this check whether lenient or strict */ + if (! isLegalUTF8(source, extraBytesToRead+1)) { + result = sourceIllegal; + break; + } + /* + * The cases all fall through. See "Note A" below. + */ + switch (extraBytesToRead) { + case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ + case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ + case 3: ch += *source++; ch <<= 6; + case 2: ch += *source++; ch <<= 6; + case 1: ch += *source++; ch <<= 6; + case 0: ch += *source++; + } + ch -= offsetsFromUTF8[extraBytesToRead]; + + if (target >= targetEnd) { + source -= (extraBytesToRead+1); /* Back up source pointer! */ + result = targetExhausted; break; + } + if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */ + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { + if (flags == strictConversion) { + source -= (extraBytesToRead+1); /* return to the illegal value itself */ + result = sourceIllegal; + break; + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + *target++ = (UTF16)ch; /* normal case */ + } + } else if (ch > UNI_MAX_UTF16) { + if (flags == strictConversion) { + result = sourceIllegal; + source -= (extraBytesToRead+1); /* return to the start */ + break; /* Bail out; shouldn't continue */ + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + /* target is a character in range 0xFFFF - 0x10FFFF. */ + if (target + 1 >= targetEnd) { + source -= (extraBytesToRead+1); /* Back up source pointer! */ + result = targetExhausted; break; + } + ch -= halfBase; + *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START); + *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START); + } + } +*sourceStart = source; +*targetStart = target; +return result; +} + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF32toUTF8 (const UTF32** sourceStart, const UTF32* sourceEnd, + UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF32* source = *sourceStart; + UTF8* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch; + unsigned short bytesToWrite = 0; + const UTF32 byteMask = 0xBF; + const UTF32 byteMark = 0x80; + ch = *source++; + if (flags == strictConversion ) { + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } + /* + * Figure out how many bytes the result will require. Turn any + * illegally large UTF32 things (> Plane 17) into replacement chars. + */ + if (ch < (UTF32)0x80) { bytesToWrite = 1; + } else if (ch < (UTF32)0x800) { bytesToWrite = 2; + } else if (ch < (UTF32)0x10000) { bytesToWrite = 3; + } else if (ch <= UNI_MAX_LEGAL_UTF32) { bytesToWrite = 4; + } else { bytesToWrite = 3; + ch = UNI_REPLACEMENT_CHAR; + result = sourceIllegal; + } + + target += bytesToWrite; + if (target > targetEnd) { + --source; /* Back up source pointer! */ + target -= bytesToWrite; result = targetExhausted; break; + } + switch (bytesToWrite) { /* note: everything falls through. */ + case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 1: *--target = (UTF8) (ch | firstByteMark[bytesToWrite]); + } + target += bytesToWrite; + } +*sourceStart = source; +*targetStart = target; +return result; +} + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF8toUTF32 (const UTF8** sourceStart, const UTF8* sourceEnd, + UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF8* source = *sourceStart; + UTF32* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch = 0; + unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; + if (source + extraBytesToRead >= sourceEnd) { + result = sourceExhausted; break; + } + /* Do this check whether lenient or strict */ + if (! isLegalUTF8(source, extraBytesToRead+1)) { + result = sourceIllegal; + break; + } + /* + * The cases all fall through. See "Note A" below. + */ + switch (extraBytesToRead) { + case 5: ch += *source++; ch <<= 6; + case 4: ch += *source++; ch <<= 6; + case 3: ch += *source++; ch <<= 6; + case 2: ch += *source++; ch <<= 6; + case 1: ch += *source++; ch <<= 6; + case 0: ch += *source++; + } + ch -= offsetsFromUTF8[extraBytesToRead]; + + if (target >= targetEnd) { + source -= (extraBytesToRead+1); /* Back up the source pointer! */ + result = targetExhausted; break; + } + if (ch <= UNI_MAX_LEGAL_UTF32) { + /* + * UTF-16 surrogate values are illegal in UTF-32, and anything + * over Plane 17 (> 0x10FFFF) is illegal. + */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { + if (flags == strictConversion) { + source -= (extraBytesToRead+1); /* return to the illegal value itself */ + result = sourceIllegal; + break; + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + *target++ = ch; + } + } else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */ + result = sourceIllegal; + *target++ = UNI_REPLACEMENT_CHAR; + } + } + *sourceStart = source; + *targetStart = target; + return result; +} + +/* --------------------------------------------------------------------- + +Note A. +The fall-through switches in UTF-8 reading code save a +temp variable, some decrements & conditionals. The switches +are equivalent to the following loop: +{ + int tmpBytesToRead = extraBytesToRead+1; + do { + ch += *source++; + --tmpBytesToRead; + if (tmpBytesToRead) ch <<= 6; + } while (tmpBytesToRead > 0); +} +In UTF-8 writing code, the switches on "bytesToWrite" are +similarly unrolled loops. + +--------------------------------------------------------------------- */ diff --git a/3rdParty/Breakpad/src/common/convert_UTF.h b/3rdParty/Breakpad/src/common/convert_UTF.h new file mode 100644 index 0000000..b1556de --- /dev/null +++ b/3rdParty/Breakpad/src/common/convert_UTF.h @@ -0,0 +1,143 @@ +/* + * Copyright 2001-2004 Unicode, Inc. + * + * Disclaimer + * + * This source code is provided as is by Unicode, Inc. No claims are + * made as to fitness for any particular purpose. No warranties of any + * kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been + * purchased on magnetic or optical media from Unicode, Inc., the + * sole remedy for any claim will be exchange of defective media + * within 90 days of receipt. + * + * Limitations on Rights to Redistribute This Code + * + * Unicode, Inc. hereby grants the right to freely use the information + * supplied in this file in the creation of products supporting the + * Unicode Standard, and to make copies of this file in any form + * for internal or external distribution as long as this notice + * remains attached. + */ + +/* --------------------------------------------------------------------- + +Conversions between UTF32, UTF-16, and UTF-8. Header file. + +Several funtions are included here, forming a complete set of +conversions between the three formats. UTF-7 is not included +here, but is handled in a separate source file. + +Each of these routines takes pointers to input buffers and output +buffers. The input buffers are const. + +Each routine converts the text between *sourceStart and sourceEnd, +putting the result into the buffer between *targetStart and +targetEnd. Note: the end pointers are *after* the last item: e.g. +*(sourceEnd - 1) is the last item. + +The return result indicates whether the conversion was successful, +and if not, whether the problem was in the source or target buffers. +(Only the first encountered problem is indicated.) + +After the conversion, *sourceStart and *targetStart are both +updated to point to the end of last text successfully converted in +the respective buffers. + +Input parameters: +sourceStart - pointer to a pointer to the source buffer. +The contents of this are modified on return so that +it points at the next thing to be converted. +targetStart - similarly, pointer to pointer to the target buffer. +sourceEnd, targetEnd - respectively pointers to the ends of the +two buffers, for overflow checking only. + +These conversion functions take a ConversionFlags argument. When this +flag is set to strict, both irregular sequences and isolated surrogates +will cause an error. When the flag is set to lenient, both irregular +sequences and isolated surrogates are converted. + +Whether the flag is strict or lenient, all illegal sequences will cause +an error return. This includes sequences such as: <F4 90 80 80>, <C0 80>, +or <A0> in UTF-8, and values above 0x10FFFF in UTF-32. Conformant code +must check for illegal sequences. + +When the flag is set to lenient, characters over 0x10FFFF are converted +to the replacement character; otherwise (when the flag is set to strict) +they constitute an error. + +Output parameters: +The value "sourceIllegal" is returned from some routines if the input +sequence is malformed. When "sourceIllegal" is returned, the source +value will point to the illegal value that caused the problem. E.g., +in UTF-8 when a sequence is malformed, it points to the start of the +malformed sequence. + +Author: Mark E. Davis, 1994. +Rev History: Rick McGowan, fixes & updates May 2001. +Fixes & updates, Sept 2001. + +------------------------------------------------------------------------ */ + +/* --------------------------------------------------------------------- +The following 4 definitions are compiler-specific. +The C standard does not guarantee that wchar_t has at least +16 bits, so wchar_t is no less portable than unsigned short! +All should be unsigned values to avoid sign extension during +bit mask & shift operations. +------------------------------------------------------------------------ */ + +typedef unsigned long UTF32; /* at least 32 bits */ +typedef unsigned short UTF16; /* at least 16 bits */ +typedef unsigned char UTF8; /* typically 8 bits */ +typedef unsigned char Boolean; /* 0 or 1 */ + +/* Some fundamental constants */ +#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD +#define UNI_MAX_BMP (UTF32)0x0000FFFF +#define UNI_MAX_UTF16 (UTF32)0x0010FFFF +#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF +#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF + +typedef enum { + conversionOK, /* conversion successful */ + sourceExhausted, /* partial character in source, but hit end */ + targetExhausted, /* insuff. room in target for conversion */ + sourceIllegal /* source sequence is illegal/malformed */ +} ConversionResult; + +typedef enum { + strictConversion = 0, + lenientConversion +} ConversionFlags; + +/* This is for C++ and does no harm in C */ +#ifdef __cplusplus +extern "C" { +#endif + +ConversionResult ConvertUTF8toUTF16 (const UTF8** sourceStart, const UTF8* sourceEnd, + UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags); + +ConversionResult ConvertUTF16toUTF8 (const UTF16** sourceStart, const UTF16* sourceEnd, + UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags); + +ConversionResult ConvertUTF8toUTF32 (const UTF8** sourceStart, const UTF8* sourceEnd, + UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags); + +ConversionResult ConvertUTF32toUTF8 (const UTF32** sourceStart, const UTF32* sourceEnd, + UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags); + +ConversionResult ConvertUTF16toUTF32 (const UTF16** sourceStart, const UTF16* sourceEnd, + UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags); + +ConversionResult ConvertUTF32toUTF16 (const UTF32** sourceStart, const UTF32* sourceEnd, + UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags); + +Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd); + +#ifdef __cplusplus +} +#endif + +/* --------------------------------------------------------------------- */ diff --git a/3rdParty/Breakpad/src/common/dwarf/bytereader-inl.h b/3rdParty/Breakpad/src/common/dwarf/bytereader-inl.h new file mode 100644 index 0000000..3c16708 --- /dev/null +++ b/3rdParty/Breakpad/src/common/dwarf/bytereader-inl.h @@ -0,0 +1,175 @@ +// Copyright 2006 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef UTIL_DEBUGINFO_BYTEREADER_INL_H__ +#define UTIL_DEBUGINFO_BYTEREADER_INL_H__ + +#include "common/dwarf/bytereader.h" + +#include <assert.h> + +namespace dwarf2reader { + +inline uint8 ByteReader::ReadOneByte(const char* buffer) const { + return buffer[0]; +} + +inline uint16 ByteReader::ReadTwoBytes(const char* signed_buffer) const { + const unsigned char *buffer + = reinterpret_cast<const unsigned char *>(signed_buffer); + const uint16 buffer0 = buffer[0]; + const uint16 buffer1 = buffer[1]; + if (endian_ == ENDIANNESS_LITTLE) { + return buffer0 | buffer1 << 8; + } else { + return buffer1 | buffer0 << 8; + } +} + +inline uint64 ByteReader::ReadFourBytes(const char* signed_buffer) const { + const unsigned char *buffer + = reinterpret_cast<const unsigned char *>(signed_buffer); + const uint32 buffer0 = buffer[0]; + const uint32 buffer1 = buffer[1]; + const uint32 buffer2 = buffer[2]; + const uint32 buffer3 = buffer[3]; + if (endian_ == ENDIANNESS_LITTLE) { + return buffer0 | buffer1 << 8 | buffer2 << 16 | buffer3 << 24; + } else { + return buffer3 | buffer2 << 8 | buffer1 << 16 | buffer0 << 24; + } +} + +inline uint64 ByteReader::ReadEightBytes(const char* signed_buffer) const { + const unsigned char *buffer + = reinterpret_cast<const unsigned char *>(signed_buffer); + const uint64 buffer0 = buffer[0]; + const uint64 buffer1 = buffer[1]; + const uint64 buffer2 = buffer[2]; + const uint64 buffer3 = buffer[3]; + const uint64 buffer4 = buffer[4]; + const uint64 buffer5 = buffer[5]; + const uint64 buffer6 = buffer[6]; + const uint64 buffer7 = buffer[7]; + if (endian_ == ENDIANNESS_LITTLE) { + return buffer0 | buffer1 << 8 | buffer2 << 16 | buffer3 << 24 | + buffer4 << 32 | buffer5 << 40 | buffer6 << 48 | buffer7 << 56; + } else { + return buffer7 | buffer6 << 8 | buffer5 << 16 | buffer4 << 24 | + buffer3 << 32 | buffer2 << 40 | buffer1 << 48 | buffer0 << 56; + } +} + +// Read an unsigned LEB128 number. Each byte contains 7 bits of +// information, plus one bit saying whether the number continues or +// not. + +inline uint64 ByteReader::ReadUnsignedLEB128(const char* buffer, + size_t* len) const { + uint64 result = 0; + size_t num_read = 0; + unsigned int shift = 0; + unsigned char byte; + + do { + byte = *buffer++; + num_read++; + + result |= (static_cast<uint64>(byte & 0x7f)) << shift; + + shift += 7; + + } while (byte & 0x80); + + *len = num_read; + + return result; +} + +// Read a signed LEB128 number. These are like regular LEB128 +// numbers, except the last byte may have a sign bit set. + +inline int64 ByteReader::ReadSignedLEB128(const char* buffer, + size_t* len) const { + int64 result = 0; + unsigned int shift = 0; + size_t num_read = 0; + unsigned char byte; + + do { + byte = *buffer++; + num_read++; + result |= (static_cast<uint64>(byte & 0x7f) << shift); + shift += 7; + } while (byte & 0x80); + + if ((shift < 8 * sizeof (result)) && (byte & 0x40)) + result |= -((static_cast<int64>(1)) << shift); + *len = num_read; + return result; +} + +inline uint64 ByteReader::ReadOffset(const char* buffer) const { + assert(this->offset_reader_); + return (this->*offset_reader_)(buffer); +} + +inline uint64 ByteReader::ReadAddress(const char* buffer) const { + assert(this->address_reader_); + return (this->*address_reader_)(buffer); +} + +inline void ByteReader::SetCFIDataBase(uint64 section_base, + const char *buffer_base) { + section_base_ = section_base; + buffer_base_ = buffer_base; + have_section_base_ = true; +} + +inline void ByteReader::SetTextBase(uint64 text_base) { + text_base_ = text_base; + have_text_base_ = true; +} + +inline void ByteReader::SetDataBase(uint64 data_base) { + data_base_ = data_base; + have_data_base_ = true; +} + +inline void ByteReader::SetFunctionBase(uint64 function_base) { + function_base_ = function_base; + have_function_base_ = true; +} + +inline void ByteReader::ClearFunctionBase() { + have_function_base_ = false; +} + +} // namespace dwarf2reader + +#endif // UTIL_DEBUGINFO_BYTEREADER_INL_H__ diff --git a/3rdParty/Breakpad/src/common/dwarf/bytereader.cc b/3rdParty/Breakpad/src/common/dwarf/bytereader.cc new file mode 100644 index 0000000..6802026 --- /dev/null +++ b/3rdParty/Breakpad/src/common/dwarf/bytereader.cc @@ -0,0 +1,245 @@ +// Copyright (c) 2010 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <assert.h> +#include <stdlib.h> + +#include "common/dwarf/bytereader-inl.h" +#include "common/dwarf/bytereader.h" + +namespace dwarf2reader { + +ByteReader::ByteReader(enum Endianness endian) + :offset_reader_(NULL), address_reader_(NULL), endian_(endian), + address_size_(0), offset_size_(0), + have_section_base_(), have_text_base_(), have_data_base_(), + have_function_base_() { } + +ByteReader::~ByteReader() { } + +void ByteReader::SetOffsetSize(uint8 size) { + offset_size_ = size; + assert(size == 4 || size == 8); + if (size == 4) { + this->offset_reader_ = &ByteReader::ReadFourBytes; + } else { + this->offset_reader_ = &ByteReader::ReadEightBytes; + } +} + +void ByteReader::SetAddressSize(uint8 size) { + address_size_ = size; + assert(size == 4 || size == 8); + if (size == 4) { + this->address_reader_ = &ByteReader::ReadFourBytes; + } else { + this->address_reader_ = &ByteReader::ReadEightBytes; + } +} + +uint64 ByteReader::ReadInitialLength(const char* start, size_t* len) { + const uint64 initial_length = ReadFourBytes(start); + start += 4; + + // In DWARF2/3, if the initial length is all 1 bits, then the offset + // size is 8 and we need to read the next 8 bytes for the real length. + if (initial_length == 0xffffffff) { + SetOffsetSize(8); + *len = 12; + return ReadOffset(start); + } else { + SetOffsetSize(4); + *len = 4; + } + return initial_length; +} + +bool ByteReader::ValidEncoding(DwarfPointerEncoding encoding) const { + if (encoding == DW_EH_PE_omit) return true; + if (encoding == DW_EH_PE_aligned) return true; + if ((encoding & 0x7) > DW_EH_PE_udata8) + return false; + if ((encoding & 0x70) > DW_EH_PE_funcrel) + return false; + return true; +} + +bool ByteReader::UsableEncoding(DwarfPointerEncoding encoding) const { + switch (encoding & 0x70) { + case DW_EH_PE_absptr: return true; + case DW_EH_PE_pcrel: return have_section_base_; + case DW_EH_PE_textrel: return have_text_base_; + case DW_EH_PE_datarel: return have_data_base_; + case DW_EH_PE_funcrel: return have_function_base_; + default: return false; + } +} + +uint64 ByteReader::ReadEncodedPointer(const char *buffer, + DwarfPointerEncoding encoding, + size_t *len) const { + // UsableEncoding doesn't approve of DW_EH_PE_omit, so we shouldn't + // see it here. + assert(encoding != DW_EH_PE_omit); + + // The Linux Standards Base 4.0 does not make this clear, but the + // GNU tools (gcc/unwind-pe.h; readelf/dwarf.c; gdb/dwarf2-frame.c) + // agree that aligned pointers are always absolute, machine-sized, + // machine-signed pointers. + if (encoding == DW_EH_PE_aligned) { + assert(have_section_base_); + + // We don't need to align BUFFER in *our* address space. Rather, we + // need to find the next position in our buffer that would be aligned + // when the .eh_frame section the buffer contains is loaded into the + // program's memory. So align assuming that buffer_base_ gets loaded at + // address section_base_, where section_base_ itself may or may not be + // aligned. + + // First, find the offset to START from the closest prior aligned + // address. + uint64 skew = section_base_ & (AddressSize() - 1); + // Now find the offset from that aligned address to buffer. + uint64 offset = skew + (buffer - buffer_base_); + // Round up to the next boundary. + uint64 aligned = (offset + AddressSize() - 1) & -AddressSize(); + // Convert back to a pointer. + const char *aligned_buffer = buffer_base_ + (aligned - skew); + // Finally, store the length and actually fetch the pointer. + *len = aligned_buffer - buffer + AddressSize(); + return ReadAddress(aligned_buffer); + } + + // Extract the value first, ignoring whether it's a pointer or an + // offset relative to some base. + uint64 offset; + switch (encoding & 0x0f) { + case DW_EH_PE_absptr: + // DW_EH_PE_absptr is weird, as it is used as a meaningful value for + // both the high and low nybble of encoding bytes. When it appears in + // the high nybble, it means that the pointer is absolute, not an + // offset from some base address. When it appears in the low nybble, + // as here, it means that the pointer is stored as a normal + // machine-sized and machine-signed address. A low nybble of + // DW_EH_PE_absptr does not imply that the pointer is absolute; it is + // correct for us to treat the value as an offset from a base address + // if the upper nybble is not DW_EH_PE_absptr. + offset = ReadAddress(buffer); + *len = AddressSize(); + break; + + case DW_EH_PE_uleb128: + offset = ReadUnsignedLEB128(buffer, len); + break; + + case DW_EH_PE_udata2: + offset = ReadTwoBytes(buffer); + *len = 2; + break; + + case DW_EH_PE_udata4: + offset = ReadFourBytes(buffer); + *len = 4; + break; + + case DW_EH_PE_udata8: + offset = ReadEightBytes(buffer); + *len = 8; + break; + + case DW_EH_PE_sleb128: + offset = ReadSignedLEB128(buffer, len); + break; + + case DW_EH_PE_sdata2: + offset = ReadTwoBytes(buffer); + // Sign-extend from 16 bits. + offset = (offset ^ 0x8000) - 0x8000; + *len = 2; + break; + + case DW_EH_PE_sdata4: + offset = ReadFourBytes(buffer); + // Sign-extend from 32 bits. + offset = (offset ^ 0x80000000ULL) - 0x80000000ULL; + *len = 4; + break; + + case DW_EH_PE_sdata8: + // No need to sign-extend; this is the full width of our type. + offset = ReadEightBytes(buffer); + *len = 8; + break; + + default: + abort(); + } + + // Find the appropriate base address. + uint64 base; + switch (encoding & 0x70) { + case DW_EH_PE_absptr: + base = 0; + break; + + case DW_EH_PE_pcrel: + assert(have_section_base_); + base = section_base_ + (buffer - buffer_base_); + break; + + case DW_EH_PE_textrel: + assert(have_text_base_); + base = text_base_; + break; + + case DW_EH_PE_datarel: + assert(have_data_base_); + base = data_base_; + break; + + case DW_EH_PE_funcrel: + assert(have_function_base_); + base = function_base_; + break; + + default: + abort(); + } + + uint64 pointer = base + offset; + + // Remove inappropriate upper bits. + if (AddressSize() == 4) + pointer = pointer & 0xffffffff; + else + assert(AddressSize() == sizeof(uint64)); + + return pointer; +} + +} // namespace dwarf2reader diff --git a/3rdParty/Breakpad/src/common/dwarf/bytereader.h b/3rdParty/Breakpad/src/common/dwarf/bytereader.h new file mode 100644 index 0000000..e389427 --- /dev/null +++ b/3rdParty/Breakpad/src/common/dwarf/bytereader.h @@ -0,0 +1,310 @@ +// -*- mode: C++ -*- + +// Copyright (c) 2010 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef COMMON_DWARF_BYTEREADER_H__ +#define COMMON_DWARF_BYTEREADER_H__ + +#include <string> +#include "common/dwarf/types.h" +#include "common/dwarf/dwarf2enums.h" + +namespace dwarf2reader { + +// We can't use the obvious name of LITTLE_ENDIAN and BIG_ENDIAN +// because it conflicts with a macro +enum Endianness { + ENDIANNESS_BIG, + ENDIANNESS_LITTLE +}; + +// A ByteReader knows how to read single- and multi-byte values of +// various endiannesses, sizes, and encodings, as used in DWARF +// debugging information and Linux C++ exception handling data. +class ByteReader { + public: + // Construct a ByteReader capable of reading one-, two-, four-, and + // eight-byte values according to ENDIANNESS, absolute machine-sized + // addresses, DWARF-style "initial length" values, signed and + // unsigned LEB128 numbers, and Linux C++ exception handling data's + // encoded pointers. + explicit ByteReader(enum Endianness endianness); + virtual ~ByteReader(); + + // Read a single byte from BUFFER and return it as an unsigned 8 bit + // number. + uint8 ReadOneByte(const char* buffer) const; + + // Read two bytes from BUFFER and return them as an unsigned 16 bit + // number, using this ByteReader's endianness. + uint16 ReadTwoBytes(const char* buffer) const; + + // Read four bytes from BUFFER and return them as an unsigned 32 bit + // number, using this ByteReader's endianness. This function returns + // a uint64 so that it is compatible with ReadAddress and + // ReadOffset. The number it returns will never be outside the range + // of an unsigned 32 bit integer. + uint64 ReadFourBytes(const char* buffer) const; + + // Read eight bytes from BUFFER and return them as an unsigned 64 + // bit number, using this ByteReader's endianness. + uint64 ReadEightBytes(const char* buffer) const; + + // Read an unsigned LEB128 (Little Endian Base 128) number from + // BUFFER and return it as an unsigned 64 bit integer. Set LEN to + // the number of bytes read. + // + // The unsigned LEB128 representation of an integer N is a variable + // number of bytes: + // + // - If N is between 0 and 0x7f, then its unsigned LEB128 + // representation is a single byte whose value is N. + // + // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) | + // 0x80, followed by the unsigned LEB128 representation of N / + // 128, rounded towards negative infinity. + // + // In other words, we break VALUE into groups of seven bits, put + // them in little-endian order, and then write them as eight-bit + // bytes with the high bit on all but the last. + uint64 ReadUnsignedLEB128(const char* buffer, size_t* len) const; + + // Read a signed LEB128 number from BUFFER and return it as an + // signed 64 bit integer. Set LEN to the number of bytes read. + // + // The signed LEB128 representation of an integer N is a variable + // number of bytes: + // + // - If N is between -0x40 and 0x3f, then its signed LEB128 + // representation is a single byte whose value is N in two's + // complement. + // + // - Otherwise, its signed LEB128 representation is (N & 0x7f) | + // 0x80, followed by the signed LEB128 representation of N / 128, + // rounded towards negative infinity. + // + // In other words, we break VALUE into groups of seven bits, put + // them in little-endian order, and then write them as eight-bit + // bytes with the high bit on all but the last. + int64 ReadSignedLEB128(const char* buffer, size_t* len) const; + + // Indicate that addresses on this architecture are SIZE bytes long. SIZE + // must be either 4 or 8. (DWARF allows addresses to be any number of + // bytes in length from 1 to 255, but we only support 32- and 64-bit + // addresses at the moment.) You must call this before using the + // ReadAddress member function. + // + // For data in a .debug_info section, or something that .debug_info + // refers to like line number or macro data, the compilation unit + // header's address_size field indicates the address size to use. Call + // frame information doesn't indicate its address size (a shortcoming of + // the spec); you must supply the appropriate size based on the + // architecture of the target machine. + void SetAddressSize(uint8 size); + + // Return the current address size, in bytes. This is either 4, + // indicating 32-bit addresses, or 8, indicating 64-bit addresses. + uint8 AddressSize() const { return address_size_; } + + // Read an address from BUFFER and return it as an unsigned 64 bit + // integer, respecting this ByteReader's endianness and address size. You + // must call SetAddressSize before calling this function. + uint64 ReadAddress(const char* buffer) const; + + // DWARF actually defines two slightly different formats: 32-bit DWARF + // and 64-bit DWARF. This is *not* related to the size of registers or + // addresses on the target machine; it refers only to the size of section + // offsets and data lengths appearing in the DWARF data. One only needs + // 64-bit DWARF when the debugging data itself is larger than 4GiB. + // 32-bit DWARF can handle x86_64 or PPC64 code just fine, unless the + // debugging data itself is very large. + // + // DWARF information identifies itself as 32-bit or 64-bit DWARF: each + // compilation unit and call frame information entry begins with an + // "initial length" field, which, in addition to giving the length of the + // data, also indicates the size of section offsets and lengths appearing + // in that data. The ReadInitialLength member function, below, reads an + // initial length and sets the ByteReader's offset size as a side effect. + // Thus, in the normal process of reading DWARF data, the appropriate + // offset size is set automatically. So, you should only need to call + // SetOffsetSize if you are using the same ByteReader to jump from the + // midst of one block of DWARF data into another. + + // Read a DWARF "initial length" field from START, and return it as + // an unsigned 64 bit integer, respecting this ByteReader's + // endianness. Set *LEN to the length of the initial length in + // bytes, either four or twelve. As a side effect, set this + // ByteReader's offset size to either 4 (if we see a 32-bit DWARF + // initial length) or 8 (if we see a 64-bit DWARF initial length). + // + // A DWARF initial length is either: + // + // - a byte count stored as an unsigned 32-bit value less than + // 0xffffff00, indicating that the data whose length is being + // measured uses the 32-bit DWARF format, or + // + // - The 32-bit value 0xffffffff, followed by a 64-bit byte count, + // indicating that the data whose length is being measured uses + // the 64-bit DWARF format. + uint64 ReadInitialLength(const char* start, size_t* len); + + // Read an offset from BUFFER and return it as an unsigned 64 bit + // integer, respecting the ByteReader's endianness. In 32-bit DWARF, the + // offset is 4 bytes long; in 64-bit DWARF, the offset is eight bytes + // long. You must call ReadInitialLength or SetOffsetSize before calling + // this function; see the comments above for details. + uint64 ReadOffset(const char* buffer) const; + + // Return the current offset size, in bytes. + // A return value of 4 indicates that we are reading 32-bit DWARF. + // A return value of 8 indicates that we are reading 64-bit DWARF. + uint8 OffsetSize() const { return offset_size_; } + + // Indicate that section offsets and lengths are SIZE bytes long. SIZE + // must be either 4 (meaning 32-bit DWARF) or 8 (meaning 64-bit DWARF). + // Usually, you should not call this function yourself; instead, let a + // call to ReadInitialLength establish the data's offset size + // automatically. + void SetOffsetSize(uint8 size); + + // The Linux C++ ABI uses a variant of DWARF call frame information + // for exception handling. This data is included in the program's + // address space as the ".eh_frame" section, and intepreted at + // runtime to walk the stack, find exception handlers, and run + // cleanup code. The format is mostly the same as DWARF CFI, with + // some adjustments made to provide the additional + // exception-handling data, and to make the data easier to work with + // in memory --- for example, to allow it to be placed in read-only + // memory even when describing position-independent code. + // + // In particular, exception handling data can select a number of + // different encodings for pointers that appear in the data, as + // described by the DwarfPointerEncoding enum. There are actually + // four axes(!) to the encoding: + // + // - The pointer size: pointers can be 2, 4, or 8 bytes long, or use + // the DWARF LEB128 encoding. + // + // - The pointer's signedness: pointers can be signed or unsigned. + // + // - The pointer's base address: the data stored in the exception + // handling data can be the actual address (that is, an absolute + // pointer), or relative to one of a number of different base + // addreses --- including that of the encoded pointer itself, for + // a form of "pc-relative" addressing. + // + // - The pointer may be indirect: it may be the address where the + // true pointer is stored. (This is used to refer to things via + // global offset table entries, program linkage table entries, or + // other tricks used in position-independent code.) + // + // There are also two options that fall outside that matrix + // altogether: the pointer may be omitted, or it may have padding to + // align it on an appropriate address boundary. (That last option + // may seem like it should be just another axis, but it is not.) + + // Indicate that the exception handling data is loaded starting at + // SECTION_BASE, and that the start of its buffer in our own memory + // is BUFFER_BASE. This allows us to find the address that a given + // byte in our buffer would have when loaded into the program the + // data describes. We need this to resolve DW_EH_PE_pcrel pointers. + void SetCFIDataBase(uint64 section_base, const char *buffer_base); + + // Indicate that the base address of the program's ".text" section + // is TEXT_BASE. We need this to resolve DW_EH_PE_textrel pointers. + void SetTextBase(uint64 text_base); + + // Indicate that the base address for DW_EH_PE_datarel pointers is + // DATA_BASE. The proper value depends on the ABI; it is usually the + // address of the global offset table, held in a designated register in + // position-independent code. You will need to look at the startup code + // for the target system to be sure. I tried; my eyes bled. + void SetDataBase(uint64 data_base); + + // Indicate that the base address for the FDE we are processing is + // FUNCTION_BASE. This is the start address of DW_EH_PE_funcrel + // pointers. (This encoding does not seem to be used by the GNU + // toolchain.) + void SetFunctionBase(uint64 function_base); + + // Indicate that we are no longer processing any FDE, so any use of + // a DW_EH_PE_funcrel encoding is an error. + void ClearFunctionBase(); + + // Return true if ENCODING is a valid pointer encoding. + bool ValidEncoding(DwarfPointerEncoding encoding) const; + + // Return true if we have all the information we need to read a + // pointer that uses ENCODING. This checks that the appropriate + // SetFooBase function for ENCODING has been called. + bool UsableEncoding(DwarfPointerEncoding encoding) const; + + // Read an encoded pointer from BUFFER using ENCODING; return the + // absolute address it represents, and set *LEN to the pointer's + // length in bytes, including any padding for aligned pointers. + // + // This function calls 'abort' if ENCODING is invalid or refers to a + // base address this reader hasn't been given, so you should check + // with ValidEncoding and UsableEncoding first if you would rather + // die in a more helpful way. + uint64 ReadEncodedPointer(const char *buffer, DwarfPointerEncoding encoding, + size_t *len) const; + + private: + + // Function pointer type for our address and offset readers. + typedef uint64 (ByteReader::*AddressReader)(const char*) const; + + // Read an offset from BUFFER and return it as an unsigned 64 bit + // integer. DWARF2/3 define offsets as either 4 or 8 bytes, + // generally depending on the amount of DWARF2/3 info present. + // This function pointer gets set by SetOffsetSize. + AddressReader offset_reader_; + + // Read an address from BUFFER and return it as an unsigned 64 bit + // integer. DWARF2/3 allow addresses to be any size from 0-255 + // bytes currently. Internally we support 4 and 8 byte addresses, + // and will CHECK on anything else. + // This function pointer gets set by SetAddressSize. + AddressReader address_reader_; + + Endianness endian_; + uint8 address_size_; + uint8 offset_size_; + + // Base addresses for Linux C++ exception handling data's encoded pointers. + bool have_section_base_, have_text_base_, have_data_base_; + bool have_function_base_; + uint64 section_base_, text_base_, data_base_, function_base_; + const char *buffer_base_; +}; + +} // namespace dwarf2reader + +#endif // COMMON_DWARF_BYTEREADER_H__ diff --git a/3rdParty/Breakpad/src/common/dwarf/dwarf2diehandler.cc b/3rdParty/Breakpad/src/common/dwarf/dwarf2diehandler.cc new file mode 100644 index 0000000..c741d69 --- /dev/null +++ b/3rdParty/Breakpad/src/common/dwarf/dwarf2diehandler.cc @@ -0,0 +1,199 @@ +// Copyright (c) 2010 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// dwarf2diehandler.cc: Implement the dwarf2reader::DieDispatcher class. +// See dwarf2diehandler.h for details. + +#include <assert.h> + +#include <string> + +#include "common/dwarf/dwarf2diehandler.h" +#include "common/using_std_string.h" + +namespace dwarf2reader { + +DIEDispatcher::~DIEDispatcher() { + while (!die_handlers_.empty()) { + HandlerStack &entry = die_handlers_.top(); + if (entry.handler_ != root_handler_) + delete entry.handler_; + die_handlers_.pop(); + } +} + +bool DIEDispatcher::StartCompilationUnit(uint64 offset, uint8 address_size, + uint8 offset_size, uint64 cu_length, + uint8 dwarf_version) { + return root_handler_->StartCompilationUnit(offset, address_size, + offset_size, cu_length, + dwarf_version); +} + +bool DIEDispatcher::StartDIE(uint64 offset, enum DwarfTag tag, + const AttributeList& attrs) { + // The stack entry for the parent of this DIE, if there is one. + HandlerStack *parent = die_handlers_.empty() ? NULL : &die_handlers_.top(); + + // Does this call indicate that we're done receiving the parent's + // attributes' values? If so, call its EndAttributes member function. + if (parent && parent->handler_ && !parent->reported_attributes_end_) { + parent->reported_attributes_end_ = true; + if (!parent->handler_->EndAttributes()) { + // Finish off this handler now. and edit *PARENT to indicate that + // we don't want to visit any of the children. + parent->handler_->Finish(); + if (parent->handler_ != root_handler_) + delete parent->handler_; + parent->handler_ = NULL; + return false; + } + } + + // Find a handler for this DIE. + DIEHandler *handler; + if (parent) { + if (parent->handler_) + // Ask the parent to find a handler. + handler = parent->handler_->FindChildHandler(offset, tag, attrs); + else + // No parent handler means we're not interested in any of our + // children. + handler = NULL; + } else { + // This is the root DIE. For a non-root DIE, the parent's handler + // decides whether to visit it, but the root DIE has no parent + // handler, so we have a special method on the root DIE handler + // itself to decide. + if (root_handler_->StartRootDIE(offset, tag, attrs)) + handler = root_handler_; + else + handler = NULL; + } + + // Push a handler stack entry for this new handler. As an + // optimization, we don't push NULL-handler entries on top of other + // NULL-handler entries; we just let the oldest such entry stand for + // the whole subtree. + if (handler || !parent || parent->handler_) { + HandlerStack entry; + entry.offset_ = offset; + entry.handler_ = handler; + entry.reported_attributes_end_ = false; + die_handlers_.push(entry); + } + + return handler != NULL; +} + +void DIEDispatcher::EndDIE(uint64 offset) { + assert(!die_handlers_.empty()); + HandlerStack *entry = &die_handlers_.top(); + if (entry->handler_) { + // This entry had better be the handler for this DIE. + assert(entry->offset_ == offset); + // If a DIE has no children, this EndDIE call indicates that we're + // done receiving its attributes' values. + if (!entry->reported_attributes_end_) + entry->handler_->EndAttributes(); // Ignore return value: no children. + entry->handler_->Finish(); + if (entry->handler_ != root_handler_) + delete entry->handler_; + } else { + // If this DIE is within a tree we're ignoring, then don't pop the + // handler stack: that entry stands for the whole tree. + if (entry->offset_ != offset) + return; + } + die_handlers_.pop(); +} + +void DIEDispatcher::ProcessAttributeUnsigned(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64 data) { + HandlerStack ¤t = die_handlers_.top(); + // This had better be an attribute of the DIE we were meant to handle. + assert(offset == current.offset_); + current.handler_->ProcessAttributeUnsigned(attr, form, data); +} + +void DIEDispatcher::ProcessAttributeSigned(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + int64 data) { + HandlerStack ¤t = die_handlers_.top(); + // This had better be an attribute of the DIE we were meant to handle. + assert(offset == current.offset_); + current.handler_->ProcessAttributeSigned(attr, form, data); +} + +void DIEDispatcher::ProcessAttributeReference(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64 data) { + HandlerStack ¤t = die_handlers_.top(); + // This had better be an attribute of the DIE we were meant to handle. + assert(offset == current.offset_); + current.handler_->ProcessAttributeReference(attr, form, data); +} + +void DIEDispatcher::ProcessAttributeBuffer(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + const char* data, + uint64 len) { + HandlerStack ¤t = die_handlers_.top(); + // This had better be an attribute of the DIE we were meant to handle. + assert(offset == current.offset_); + current.handler_->ProcessAttributeBuffer(attr, form, data, len); +} + +void DIEDispatcher::ProcessAttributeString(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + const string& data) { + HandlerStack ¤t = die_handlers_.top(); + // This had better be an attribute of the DIE we were meant to handle. + assert(offset == current.offset_); + current.handler_->ProcessAttributeString(attr, form, data); +} + +void DIEDispatcher::ProcessAttributeSignature(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64 signature) { + HandlerStack ¤t = die_handlers_.top(); + // This had better be an attribute of the DIE we were meant to handle. + assert(offset == current.offset_); + current.handler_->ProcessAttributeSignature(attr, form, signature); +} + +} // namespace dwarf2reader diff --git a/3rdParty/Breakpad/src/common/dwarf/dwarf2diehandler.h b/3rdParty/Breakpad/src/common/dwarf/dwarf2diehandler.h new file mode 100644 index 0000000..12b8d3a --- /dev/null +++ b/3rdParty/Breakpad/src/common/dwarf/dwarf2diehandler.h @@ -0,0 +1,367 @@ +// -*- mode: c++ -*- + +// Copyright (c) 2010 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// dwarf2reader::CompilationUnit is a simple and direct parser for +// DWARF data, but its handler interface is not convenient to use. In +// particular: +// +// - CompilationUnit calls Dwarf2Handler's member functions to report +// every attribute's value, regardless of what sort of DIE it is. +// As a result, the ProcessAttributeX functions end up looking like +// this: +// +// switch (parent_die_tag) { +// case DW_TAG_x: +// switch (attribute_name) { +// case DW_AT_y: +// handle attribute y of DIE type x +// ... +// } break; +// ... +// } +// +// In C++ it's much nicer to use virtual function dispatch to find +// the right code for a given case than to switch on the DIE tag +// like this. +// +// - Processing different kinds of DIEs requires different sets of +// data: lexical block DIEs have start and end addresses, but struct +// type DIEs don't. It would be nice to be able to have separate +// handler classes for separate kinds of DIEs, each with the members +// appropriate to its role, instead of having one handler class that +// needs to hold data for every DIE type. +// +// - There should be a separate instance of the appropriate handler +// class for each DIE, instead of a single object with tables +// tracking all the dies in the compilation unit. +// +// - It's not convenient to take some action after all a DIE's +// attributes have been seen, but before visiting any of its +// children. The only indication you have that a DIE's attribute +// list is complete is that you get either a StartDIE or an EndDIE +// call. +// +// - It's not convenient to make use of the tree structure of the +// DIEs. Skipping all the children of a given die requires +// maintaining state and returning false from StartDIE until we get +// an EndDIE call with the appropriate offset. +// +// This interface tries to take care of all that. (You're shocked, I'm sure.) +// +// Using the classes here, you provide an initial handler for the root +// DIE of the compilation unit. Each handler receives its DIE's +// attributes, and provides fresh handler objects for children of +// interest, if any. The three classes are: +// +// - DIEHandler: the base class for your DIE-type-specific handler +// classes. +// +// - RootDIEHandler: derived from DIEHandler, the base class for your +// root DIE handler class. +// +// - DIEDispatcher: derived from Dwarf2Handler, an instance of this +// invokes your DIE-type-specific handler objects. +// +// In detail: +// +// - Define handler classes specialized for the DIE types you're +// interested in. These handler classes must inherit from +// DIEHandler. Thus: +// +// class My_DW_TAG_X_Handler: public DIEHandler { ... }; +// class My_DW_TAG_Y_Handler: public DIEHandler { ... }; +// +// DIEHandler subclasses needn't correspond exactly to single DIE +// types, as shown here; the point is that you can have several +// different classes appropriate to different kinds of DIEs. +// +// - In particular, define a handler class for the compilation +// unit's root DIE, that inherits from RootDIEHandler: +// +// class My_DW_TAG_compile_unit_Handler: public RootDIEHandler { ... }; +// +// RootDIEHandler inherits from DIEHandler, adding a few additional +// member functions for examining the compilation unit as a whole, +// and other quirks of rootness. +// +// - Then, create a DIEDispatcher instance, passing it an instance of +// your root DIE handler class, and use that DIEDispatcher as the +// dwarf2reader::CompilationUnit's handler: +// +// My_DW_TAG_compile_unit_Handler root_die_handler(...); +// DIEDispatcher die_dispatcher(&root_die_handler); +// CompilationUnit reader(sections, offset, bytereader, &die_dispatcher); +// +// Here, 'die_dispatcher' acts as a shim between 'reader' and the +// various DIE-specific handlers you have defined. +// +// - When you call reader.Start(), die_dispatcher behaves as follows, +// starting with your root die handler and the compilation unit's +// root DIE: +// +// - It calls the handler's ProcessAttributeX member functions for +// each of the DIE's attributes. +// +// - It calls the handler's EndAttributes member function. This +// should return true if any of the DIE's children should be +// visited, in which case: +// +// - For each of the DIE's children, die_dispatcher calls the +// DIE's handler's FindChildHandler member function. If that +// returns a pointer to a DIEHandler instance, then +// die_dispatcher uses that handler to process the child, using +// this procedure recursively. Alternatively, if +// FindChildHandler returns NULL, die_dispatcher ignores that +// child and its descendants. +// +// - When die_dispatcher has finished processing all the DIE's +// children, it invokes the handler's Finish() member function, +// and destroys the handler. (As a special case, it doesn't +// destroy the root DIE handler.) +// +// This allows the code for handling a particular kind of DIE to be +// gathered together in a single class, makes it easy to skip all the +// children or individual children of a particular DIE, and provides +// appropriate parental context for each die. + +#ifndef COMMON_DWARF_DWARF2DIEHANDLER_H__ +#define COMMON_DWARF_DWARF2DIEHANDLER_H__ + +#include <stack> +#include <string> + +#include "common/dwarf/types.h" +#include "common/dwarf/dwarf2enums.h" +#include "common/dwarf/dwarf2reader.h" +#include "common/using_std_string.h" + +namespace dwarf2reader { + +// A base class for handlers for specific DIE types. The series of +// calls made on a DIE handler is as follows: +// +// - for each attribute of the DIE: +// - ProcessAttributeX() +// - EndAttributes() +// - if that returned true, then for each child: +// - FindChildHandler() +// - if that returns a non-NULL pointer to a new handler: +// - recurse, with the new handler and the child die +// - Finish() +// - destruction +class DIEHandler { + public: + DIEHandler() { } + virtual ~DIEHandler() { } + + // When we visit a DIE, we first use these member functions to + // report the DIE's attributes and their values. These have the + // same restrictions as the corresponding member functions of + // dwarf2reader::Dwarf2Handler. + // + // Since DWARF does not specify in what order attributes must + // appear, avoid making decisions in these functions that would be + // affected by the presence of other attributes. The EndAttributes + // function is a more appropriate place for such work, as all the + // DIE's attributes have been seen at that point. + // + // The default definitions ignore the values they are passed. + virtual void ProcessAttributeUnsigned(enum DwarfAttribute attr, + enum DwarfForm form, + uint64 data) { } + virtual void ProcessAttributeSigned(enum DwarfAttribute attr, + enum DwarfForm form, + int64 data) { } + virtual void ProcessAttributeReference(enum DwarfAttribute attr, + enum DwarfForm form, + uint64 data) { } + virtual void ProcessAttributeBuffer(enum DwarfAttribute attr, + enum DwarfForm form, + const char* data, + uint64 len) { } + virtual void ProcessAttributeString(enum DwarfAttribute attr, + enum DwarfForm form, + const string& data) { } + virtual void ProcessAttributeSignature(enum DwarfAttribute attr, + enum DwarfForm form, + uint64 signture) { } + + // Once we have reported all the DIE's attributes' values, we call + // this member function. If it returns false, we skip all the DIE's + // children. If it returns true, we call FindChildHandler on each + // child. If that returns a handler object, we use that to visit + // the child; otherwise, we skip the child. + // + // This is a good place to make decisions that depend on more than + // one attribute. DWARF does not specify in what order attributes + // must appear, so only when the EndAttributes function is called + // does the handler have a complete picture of the DIE's attributes. + // + // The default definition elects to ignore the DIE's children. + // You'll need to override this if you override FindChildHandler, + // but at least the default behavior isn't to pass the children to + // FindChildHandler, which then ignores them all. + virtual bool EndAttributes() { return false; } + + // If EndAttributes returns true to indicate that some of the DIE's + // children might be of interest, then we apply this function to + // each of the DIE's children. If it returns a handler object, then + // we use that to visit the child DIE. If it returns NULL, we skip + // that child DIE (and all its descendants). + // + // OFFSET is the offset of the child; TAG indicates what kind of DIE + // it is; and ATTRS is the list of attributes the DIE will have, and + // their forms (their values are not provided). + // + // The default definition skips all children. + virtual DIEHandler *FindChildHandler(uint64 offset, enum DwarfTag tag, + const AttributeList &attrs) { + return NULL; + } + + // When we are done processing a DIE, we call this member function. + // This happens after the EndAttributes call, all FindChildHandler + // calls (if any), and all operations on the children themselves (if + // any). We call Finish on every handler --- even if EndAttributes + // returns false. + virtual void Finish() { }; +}; + +// A subclass of DIEHandler, with additional kludges for handling the +// compilation unit's root die. +class RootDIEHandler: public DIEHandler { + public: + RootDIEHandler() { } + virtual ~RootDIEHandler() { } + + // We pass the values reported via Dwarf2Handler::StartCompilationUnit + // to this member function, and skip the entire compilation unit if it + // returns false. So the root DIE handler is actually also + // responsible for handling the compilation unit metadata. + // The default definition always visits the compilation unit. + virtual bool StartCompilationUnit(uint64 offset, uint8 address_size, + uint8 offset_size, uint64 cu_length, + uint8 dwarf_version) { return true; } + + // For the root DIE handler only, we pass the offset, tag and + // attributes of the compilation unit's root DIE. This is the only + // way the root DIE handler can find the root DIE's tag. If this + // function returns true, we will visit the root DIE using the usual + // DIEHandler methods; otherwise, we skip the entire compilation + // unit. + // + // The default definition elects to visit the root DIE. + virtual bool StartRootDIE(uint64 offset, enum DwarfTag tag, + const AttributeList& attrs) { return true; } +}; + +class DIEDispatcher: public Dwarf2Handler { + public: + // Create a Dwarf2Handler which uses ROOT_HANDLER as the handler for + // the compilation unit's root die, as described for the DIEHandler + // class. + DIEDispatcher(RootDIEHandler *root_handler) : root_handler_(root_handler) { } + // Destroying a DIEDispatcher destroys all active handler objects + // except the root handler. + ~DIEDispatcher(); + bool StartCompilationUnit(uint64 offset, uint8 address_size, + uint8 offset_size, uint64 cu_length, + uint8 dwarf_version); + bool StartDIE(uint64 offset, enum DwarfTag tag, + const AttributeList &attrs); + void ProcessAttributeUnsigned(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64 data); + void ProcessAttributeSigned(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + int64 data); + void ProcessAttributeReference(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64 data); + void ProcessAttributeBuffer(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + const char* data, + uint64 len); + void ProcessAttributeString(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + const string &data); + void ProcessAttributeSignature(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64 signature); + void EndDIE(uint64 offset); + + private: + + // The type of a handler stack entry. This includes some fields + // which don't really need to be on the stack --- they could just be + // single data members of DIEDispatcher --- but putting them here + // makes it easier to see that the code is correct. + struct HandlerStack { + // The offset of the DIE for this handler stack entry. + uint64 offset_; + + // The handler object interested in this DIE's attributes and + // children. If NULL, we're not interested in either. + DIEHandler *handler_; + + // Have we reported the end of this DIE's attributes to the handler? + bool reported_attributes_end_; + }; + + // Stack of DIE attribute handlers. At StartDIE(D), the top of the + // stack is the handler of D's parent, whom we may ask for a handler + // for D itself. At EndDIE(D), the top of the stack is D's handler. + // Special cases: + // + // - Before we've seen the compilation unit's root DIE, the stack is + // empty; we'll call root_handler_'s special member functions, and + // perhaps push root_handler_ on the stack to look at the root's + // immediate children. + // + // - When we decide to ignore a subtree, we only push an entry on + // the stack for the root of the tree being ignored, rather than + // pushing lots of stack entries with handler_ set to NULL. + std::stack<HandlerStack> die_handlers_; + + // The root handler. We don't push it on die_handlers_ until we + // actually get the StartDIE call for the root. + RootDIEHandler *root_handler_; +}; + +} // namespace dwarf2reader +#endif // COMMON_DWARF_DWARF2DIEHANDLER_H__ diff --git a/3rdParty/Breakpad/src/common/dwarf/dwarf2enums.h b/3rdParty/Breakpad/src/common/dwarf/dwarf2enums.h new file mode 100644 index 0000000..5565d66 --- /dev/null +++ b/3rdParty/Breakpad/src/common/dwarf/dwarf2enums.h @@ -0,0 +1,650 @@ +// -*- mode: c++ -*- + +// Copyright (c) 2010 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef COMMON_DWARF_DWARF2ENUMS_H__ +#define COMMON_DWARF_DWARF2ENUMS_H__ + +namespace dwarf2reader { + +// These enums do not follow the google3 style only because they are +// known universally (specs, other implementations) by the names in +// exactly this capitalization. +// Tag names and codes. +enum DwarfTag { + DW_TAG_padding = 0x00, + DW_TAG_array_type = 0x01, + DW_TAG_class_type = 0x02, + DW_TAG_entry_point = 0x03, + DW_TAG_enumeration_type = 0x04, + DW_TAG_formal_parameter = 0x05, + DW_TAG_imported_declaration = 0x08, + DW_TAG_label = 0x0a, + DW_TAG_lexical_block = 0x0b, + DW_TAG_member = 0x0d, + DW_TAG_pointer_type = 0x0f, + DW_TAG_reference_type = 0x10, + DW_TAG_compile_unit = 0x11, + DW_TAG_string_type = 0x12, + DW_TAG_structure_type = 0x13, + DW_TAG_subroutine_type = 0x15, + DW_TAG_typedef = 0x16, + DW_TAG_union_type = 0x17, + DW_TAG_unspecified_parameters = 0x18, + DW_TAG_variant = 0x19, + DW_TAG_common_block = 0x1a, + DW_TAG_common_inclusion = 0x1b, + DW_TAG_inheritance = 0x1c, + DW_TAG_inlined_subroutine = 0x1d, + DW_TAG_module = 0x1e, + DW_TAG_ptr_to_member_type = 0x1f, + DW_TAG_set_type = 0x20, + DW_TAG_subrange_type = 0x21, + DW_TAG_with_stmt = 0x22, + DW_TAG_access_declaration = 0x23, + DW_TAG_base_type = 0x24, + DW_TAG_catch_block = 0x25, + DW_TAG_const_type = 0x26, + DW_TAG_constant = 0x27, + DW_TAG_enumerator = 0x28, + DW_TAG_file_type = 0x29, + DW_TAG_friend = 0x2a, + DW_TAG_namelist = 0x2b, + DW_TAG_namelist_item = 0x2c, + DW_TAG_packed_type = 0x2d, + DW_TAG_subprogram = 0x2e, + DW_TAG_template_type_param = 0x2f, + DW_TAG_template_value_param = 0x30, + DW_TAG_thrown_type = 0x31, + DW_TAG_try_block = 0x32, + DW_TAG_variant_part = 0x33, + DW_TAG_variable = 0x34, + DW_TAG_volatile_type = 0x35, + // DWARF 3. + DW_TAG_dwarf_procedure = 0x36, + DW_TAG_restrict_type = 0x37, + DW_TAG_interface_type = 0x38, + DW_TAG_namespace = 0x39, + DW_TAG_imported_module = 0x3a, + DW_TAG_unspecified_type = 0x3b, + DW_TAG_partial_unit = 0x3c, + DW_TAG_imported_unit = 0x3d, + // SGI/MIPS Extensions. + DW_TAG_MIPS_loop = 0x4081, + // HP extensions. See: + // ftp://ftp.hp.com/pub/lang/tools/WDB/wdb-4.0.tar.gz + DW_TAG_HP_array_descriptor = 0x4090, + // GNU extensions. + DW_TAG_format_label = 0x4101, // For FORTRAN 77 and Fortran 90. + DW_TAG_function_template = 0x4102, // For C++. + DW_TAG_class_template = 0x4103, // For C++. + DW_TAG_GNU_BINCL = 0x4104, + DW_TAG_GNU_EINCL = 0x4105, + // Extensions for UPC. See: http://upc.gwu.edu/~upc. + DW_TAG_upc_shared_type = 0x8765, + DW_TAG_upc_strict_type = 0x8766, + DW_TAG_upc_relaxed_type = 0x8767, + // PGI (STMicroelectronics) extensions. No documentation available. + DW_TAG_PGI_kanji_type = 0xA000, + DW_TAG_PGI_interface_block = 0xA020 +}; + + +enum DwarfHasChild { + DW_children_no = 0, + DW_children_yes = 1 +}; + +// Form names and codes. +enum DwarfForm { + DW_FORM_addr = 0x01, + DW_FORM_block2 = 0x03, + DW_FORM_block4 = 0x04, + DW_FORM_data2 = 0x05, + DW_FORM_data4 = 0x06, + DW_FORM_data8 = 0x07, + DW_FORM_string = 0x08, + DW_FORM_block = 0x09, + DW_FORM_block1 = 0x0a, + DW_FORM_data1 = 0x0b, + DW_FORM_flag = 0x0c, + DW_FORM_sdata = 0x0d, + DW_FORM_strp = 0x0e, + DW_FORM_udata = 0x0f, + DW_FORM_ref_addr = 0x10, + DW_FORM_ref1 = 0x11, + DW_FORM_ref2 = 0x12, + DW_FORM_ref4 = 0x13, + DW_FORM_ref8 = 0x14, + DW_FORM_ref_udata = 0x15, + DW_FORM_indirect = 0x16, + + // Added in DWARF 4: + DW_FORM_sec_offset = 0x17, + DW_FORM_exprloc = 0x18, + DW_FORM_flag_present = 0x19, + DW_FORM_ref_sig8 = 0x20 +}; + +// Attribute names and codes +enum DwarfAttribute { + DW_AT_sibling = 0x01, + DW_AT_location = 0x02, + DW_AT_name = 0x03, + DW_AT_ordering = 0x09, + DW_AT_subscr_data = 0x0a, + DW_AT_byte_size = 0x0b, + DW_AT_bit_offset = 0x0c, + DW_AT_bit_size = 0x0d, + DW_AT_element_list = 0x0f, + DW_AT_stmt_list = 0x10, + DW_AT_low_pc = 0x11, + DW_AT_high_pc = 0x12, + DW_AT_language = 0x13, + DW_AT_member = 0x14, + DW_AT_discr = 0x15, + DW_AT_discr_value = 0x16, + DW_AT_visibility = 0x17, + DW_AT_import = 0x18, + DW_AT_string_length = 0x19, + DW_AT_common_reference = 0x1a, + DW_AT_comp_dir = 0x1b, + DW_AT_const_value = 0x1c, + DW_AT_containing_type = 0x1d, + DW_AT_default_value = 0x1e, + DW_AT_inline = 0x20, + DW_AT_is_optional = 0x21, + DW_AT_lower_bound = 0x22, + DW_AT_producer = 0x25, + DW_AT_prototyped = 0x27, + DW_AT_return_addr = 0x2a, + DW_AT_start_scope = 0x2c, + DW_AT_stride_size = 0x2e, + DW_AT_upper_bound = 0x2f, + DW_AT_abstract_origin = 0x31, + DW_AT_accessibility = 0x32, + DW_AT_address_class = 0x33, + DW_AT_artificial = 0x34, + DW_AT_base_types = 0x35, + DW_AT_calling_convention = 0x36, + DW_AT_count = 0x37, + DW_AT_data_member_location = 0x38, + DW_AT_decl_column = 0x39, + DW_AT_decl_file = 0x3a, + DW_AT_decl_line = 0x3b, + DW_AT_declaration = 0x3c, + DW_AT_discr_list = 0x3d, + DW_AT_encoding = 0x3e, + DW_AT_external = 0x3f, + DW_AT_frame_base = 0x40, + DW_AT_friend = 0x41, + DW_AT_identifier_case = 0x42, + DW_AT_macro_info = 0x43, + DW_AT_namelist_items = 0x44, + DW_AT_priority = 0x45, + DW_AT_segment = 0x46, + DW_AT_specification = 0x47, + DW_AT_static_link = 0x48, + DW_AT_type = 0x49, + DW_AT_use_location = 0x4a, + DW_AT_variable_parameter = 0x4b, + DW_AT_virtuality = 0x4c, + DW_AT_vtable_elem_location = 0x4d, + // DWARF 3 values. + DW_AT_allocated = 0x4e, + DW_AT_associated = 0x4f, + DW_AT_data_location = 0x50, + DW_AT_stride = 0x51, + DW_AT_entry_pc = 0x52, + DW_AT_use_UTF8 = 0x53, + DW_AT_extension = 0x54, + DW_AT_ranges = 0x55, + DW_AT_trampoline = 0x56, + DW_AT_call_column = 0x57, + DW_AT_call_file = 0x58, + DW_AT_call_line = 0x59, + // SGI/MIPS extensions. + DW_AT_MIPS_fde = 0x2001, + DW_AT_MIPS_loop_begin = 0x2002, + DW_AT_MIPS_tail_loop_begin = 0x2003, + DW_AT_MIPS_epilog_begin = 0x2004, + DW_AT_MIPS_loop_unroll_factor = 0x2005, + DW_AT_MIPS_software_pipeline_depth = 0x2006, + DW_AT_MIPS_linkage_name = 0x2007, + DW_AT_MIPS_stride = 0x2008, + DW_AT_MIPS_abstract_name = 0x2009, + DW_AT_MIPS_clone_origin = 0x200a, + DW_AT_MIPS_has_inlines = 0x200b, + // HP extensions. + DW_AT_HP_block_index = 0x2000, + DW_AT_HP_unmodifiable = 0x2001, // Same as DW_AT_MIPS_fde. + DW_AT_HP_actuals_stmt_list = 0x2010, + DW_AT_HP_proc_per_section = 0x2011, + DW_AT_HP_raw_data_ptr = 0x2012, + DW_AT_HP_pass_by_reference = 0x2013, + DW_AT_HP_opt_level = 0x2014, + DW_AT_HP_prof_version_id = 0x2015, + DW_AT_HP_opt_flags = 0x2016, + DW_AT_HP_cold_region_low_pc = 0x2017, + DW_AT_HP_cold_region_high_pc = 0x2018, + DW_AT_HP_all_variables_modifiable = 0x2019, + DW_AT_HP_linkage_name = 0x201a, + DW_AT_HP_prof_flags = 0x201b, // In comp unit of procs_info for -g. + // GNU extensions. + DW_AT_sf_names = 0x2101, + DW_AT_src_info = 0x2102, + DW_AT_mac_info = 0x2103, + DW_AT_src_coords = 0x2104, + DW_AT_body_begin = 0x2105, + DW_AT_body_end = 0x2106, + DW_AT_GNU_vector = 0x2107, + // VMS extensions. + DW_AT_VMS_rtnbeg_pd_address = 0x2201, + // UPC extension. + DW_AT_upc_threads_scaled = 0x3210, + // PGI (STMicroelectronics) extensions. + DW_AT_PGI_lbase = 0x3a00, + DW_AT_PGI_soffset = 0x3a01, + DW_AT_PGI_lstride = 0x3a02 +}; + + +// Line number opcodes. +enum DwarfLineNumberOps { + DW_LNS_extended_op = 0, + DW_LNS_copy = 1, + DW_LNS_advance_pc = 2, + DW_LNS_advance_line = 3, + DW_LNS_set_file = 4, + DW_LNS_set_column = 5, + DW_LNS_negate_stmt = 6, + DW_LNS_set_basic_block = 7, + DW_LNS_const_add_pc = 8, + DW_LNS_fixed_advance_pc = 9, + // DWARF 3. + DW_LNS_set_prologue_end = 10, + DW_LNS_set_epilogue_begin = 11, + DW_LNS_set_isa = 12 +}; + +// Line number extended opcodes. +enum DwarfLineNumberExtendedOps { + DW_LNE_end_sequence = 1, + DW_LNE_set_address = 2, + DW_LNE_define_file = 3, + // HP extensions. + DW_LNE_HP_negate_is_UV_update = 0x11, + DW_LNE_HP_push_context = 0x12, + DW_LNE_HP_pop_context = 0x13, + DW_LNE_HP_set_file_line_column = 0x14, + DW_LNE_HP_set_routine_name = 0x15, + DW_LNE_HP_set_sequence = 0x16, + DW_LNE_HP_negate_post_semantics = 0x17, + DW_LNE_HP_negate_function_exit = 0x18, + DW_LNE_HP_negate_front_end_logical = 0x19, + DW_LNE_HP_define_proc = 0x20 +}; + +// Type encoding names and codes +enum DwarfEncoding { + DW_ATE_address =0x1, + DW_ATE_boolean =0x2, + DW_ATE_complex_float =0x3, + DW_ATE_float =0x4, + DW_ATE_signed =0x5, + DW_ATE_signed_char =0x6, + DW_ATE_unsigned =0x7, + DW_ATE_unsigned_char =0x8, + // DWARF3/DWARF3f + DW_ATE_imaginary_float =0x9, + DW_ATE_packed_decimal =0xa, + DW_ATE_numeric_string =0xb, + DW_ATE_edited =0xc, + DW_ATE_signed_fixed =0xd, + DW_ATE_unsigned_fixed =0xe, + DW_ATE_decimal_float =0xf, + DW_ATE_lo_user =0x80, + DW_ATE_hi_user =0xff +}; + +// Location virtual machine opcodes +enum DwarfOpcode { + DW_OP_addr =0x03, + DW_OP_deref =0x06, + DW_OP_const1u =0x08, + DW_OP_const1s =0x09, + DW_OP_const2u =0x0a, + DW_OP_const2s =0x0b, + DW_OP_const4u =0x0c, + DW_OP_const4s =0x0d, + DW_OP_const8u =0x0e, + DW_OP_const8s =0x0f, + DW_OP_constu =0x10, + DW_OP_consts =0x11, + DW_OP_dup =0x12, + DW_OP_drop =0x13, + DW_OP_over =0x14, + DW_OP_pick =0x15, + DW_OP_swap =0x16, + DW_OP_rot =0x17, + DW_OP_xderef =0x18, + DW_OP_abs =0x19, + DW_OP_and =0x1a, + DW_OP_div =0x1b, + DW_OP_minus =0x1c, + DW_OP_mod =0x1d, + DW_OP_mul =0x1e, + DW_OP_neg =0x1f, + DW_OP_not =0x20, + DW_OP_or =0x21, + DW_OP_plus =0x22, + DW_OP_plus_uconst =0x23, + DW_OP_shl =0x24, + DW_OP_shr =0x25, + DW_OP_shra =0x26, + DW_OP_xor =0x27, + DW_OP_bra =0x28, + DW_OP_eq =0x29, + DW_OP_ge =0x2a, + DW_OP_gt =0x2b, + DW_OP_le =0x2c, + DW_OP_lt =0x2d, + DW_OP_ne =0x2e, + DW_OP_skip =0x2f, + DW_OP_lit0 =0x30, + DW_OP_lit1 =0x31, + DW_OP_lit2 =0x32, + DW_OP_lit3 =0x33, + DW_OP_lit4 =0x34, + DW_OP_lit5 =0x35, + DW_OP_lit6 =0x36, + DW_OP_lit7 =0x37, + DW_OP_lit8 =0x38, + DW_OP_lit9 =0x39, + DW_OP_lit10 =0x3a, + DW_OP_lit11 =0x3b, + DW_OP_lit12 =0x3c, + DW_OP_lit13 =0x3d, + DW_OP_lit14 =0x3e, + DW_OP_lit15 =0x3f, + DW_OP_lit16 =0x40, + DW_OP_lit17 =0x41, + DW_OP_lit18 =0x42, + DW_OP_lit19 =0x43, + DW_OP_lit20 =0x44, + DW_OP_lit21 =0x45, + DW_OP_lit22 =0x46, + DW_OP_lit23 =0x47, + DW_OP_lit24 =0x48, + DW_OP_lit25 =0x49, + DW_OP_lit26 =0x4a, + DW_OP_lit27 =0x4b, + DW_OP_lit28 =0x4c, + DW_OP_lit29 =0x4d, + DW_OP_lit30 =0x4e, + DW_OP_lit31 =0x4f, + DW_OP_reg0 =0x50, + DW_OP_reg1 =0x51, + DW_OP_reg2 =0x52, + DW_OP_reg3 =0x53, + DW_OP_reg4 =0x54, + DW_OP_reg5 =0x55, + DW_OP_reg6 =0x56, + DW_OP_reg7 =0x57, + DW_OP_reg8 =0x58, + DW_OP_reg9 =0x59, + DW_OP_reg10 =0x5a, + DW_OP_reg11 =0x5b, + DW_OP_reg12 =0x5c, + DW_OP_reg13 =0x5d, + DW_OP_reg14 =0x5e, + DW_OP_reg15 =0x5f, + DW_OP_reg16 =0x60, + DW_OP_reg17 =0x61, + DW_OP_reg18 =0x62, + DW_OP_reg19 =0x63, + DW_OP_reg20 =0x64, + DW_OP_reg21 =0x65, + DW_OP_reg22 =0x66, + DW_OP_reg23 =0x67, + DW_OP_reg24 =0x68, + DW_OP_reg25 =0x69, + DW_OP_reg26 =0x6a, + DW_OP_reg27 =0x6b, + DW_OP_reg28 =0x6c, + DW_OP_reg29 =0x6d, + DW_OP_reg30 =0x6e, + DW_OP_reg31 =0x6f, + DW_OP_breg0 =0x70, + DW_OP_breg1 =0x71, + DW_OP_breg2 =0x72, + DW_OP_breg3 =0x73, + DW_OP_breg4 =0x74, + DW_OP_breg5 =0x75, + DW_OP_breg6 =0x76, + DW_OP_breg7 =0x77, + DW_OP_breg8 =0x78, + DW_OP_breg9 =0x79, + DW_OP_breg10 =0x7a, + DW_OP_breg11 =0x7b, + DW_OP_breg12 =0x7c, + DW_OP_breg13 =0x7d, + DW_OP_breg14 =0x7e, + DW_OP_breg15 =0x7f, + DW_OP_breg16 =0x80, + DW_OP_breg17 =0x81, + DW_OP_breg18 =0x82, + DW_OP_breg19 =0x83, + DW_OP_breg20 =0x84, + DW_OP_breg21 =0x85, + DW_OP_breg22 =0x86, + DW_OP_breg23 =0x87, + DW_OP_breg24 =0x88, + DW_OP_breg25 =0x89, + DW_OP_breg26 =0x8a, + DW_OP_breg27 =0x8b, + DW_OP_breg28 =0x8c, + DW_OP_breg29 =0x8d, + DW_OP_breg30 =0x8e, + DW_OP_breg31 =0x8f, + DW_OP_regX =0x90, + DW_OP_fbreg =0x91, + DW_OP_bregX =0x92, + DW_OP_piece =0x93, + DW_OP_deref_size =0x94, + DW_OP_xderef_size =0x95, + DW_OP_nop =0x96, + // DWARF3/DWARF3f + DW_OP_push_object_address =0x97, + DW_OP_call2 =0x98, + DW_OP_call4 =0x99, + DW_OP_call_ref =0x9a, + DW_OP_form_tls_address =0x9b, + DW_OP_call_frame_cfa =0x9c, + DW_OP_bit_piece =0x9d, + DW_OP_lo_user =0xe0, + DW_OP_hi_user =0xff, + // GNU extensions + DW_OP_GNU_push_tls_address =0xe0 +}; + +// Source languages. These are values for DW_AT_language. +enum DwarfLanguage + { + DW_LANG_none =0x0000, + DW_LANG_C89 =0x0001, + DW_LANG_C =0x0002, + DW_LANG_Ada83 =0x0003, + DW_LANG_C_plus_plus =0x0004, + DW_LANG_Cobol74 =0x0005, + DW_LANG_Cobol85 =0x0006, + DW_LANG_Fortran77 =0x0007, + DW_LANG_Fortran90 =0x0008, + DW_LANG_Pascal83 =0x0009, + DW_LANG_Modula2 =0x000a, + DW_LANG_Java =0x000b, + DW_LANG_C99 =0x000c, + DW_LANG_Ada95 =0x000d, + DW_LANG_Fortran95 =0x000e, + DW_LANG_PLI =0x000f, + DW_LANG_ObjC =0x0010, + DW_LANG_ObjC_plus_plus =0x0011, + DW_LANG_UPC =0x0012, + DW_LANG_D =0x0013, + // Implementation-defined language code range. + DW_LANG_lo_user = 0x8000, + DW_LANG_hi_user = 0xffff, + + // Extensions. + + // MIPS assembly language. The GNU toolchain uses this for all + // assembly languages, since there's no generic DW_LANG_ value for that. + // See include/dwarf2.h in the binutils, gdb, or gcc source trees. + DW_LANG_Mips_Assembler =0x8001, + DW_LANG_Upc =0x8765 // Unified Parallel C + }; + +// Inline codes. These are values for DW_AT_inline. +enum DwarfInline { + DW_INL_not_inlined =0x0, + DW_INL_inlined =0x1, + DW_INL_declared_not_inlined =0x2, + DW_INL_declared_inlined =0x3 +}; + +// Call Frame Info instructions. +enum DwarfCFI + { + DW_CFA_advance_loc = 0x40, + DW_CFA_offset = 0x80, + DW_CFA_restore = 0xc0, + DW_CFA_nop = 0x00, + DW_CFA_set_loc = 0x01, + DW_CFA_advance_loc1 = 0x02, + DW_CFA_advance_loc2 = 0x03, + DW_CFA_advance_loc4 = 0x04, + DW_CFA_offset_extended = 0x05, + DW_CFA_restore_extended = 0x06, + DW_CFA_undefined = 0x07, + DW_CFA_same_value = 0x08, + DW_CFA_register = 0x09, + DW_CFA_remember_state = 0x0a, + DW_CFA_restore_state = 0x0b, + DW_CFA_def_cfa = 0x0c, + DW_CFA_def_cfa_register = 0x0d, + DW_CFA_def_cfa_offset = 0x0e, + DW_CFA_def_cfa_expression = 0x0f, + DW_CFA_expression = 0x10, + DW_CFA_offset_extended_sf = 0x11, + DW_CFA_def_cfa_sf = 0x12, + DW_CFA_def_cfa_offset_sf = 0x13, + DW_CFA_val_offset = 0x14, + DW_CFA_val_offset_sf = 0x15, + DW_CFA_val_expression = 0x16, + + // Opcodes in this range are reserved for user extensions. + DW_CFA_lo_user = 0x1c, + DW_CFA_hi_user = 0x3f, + + // SGI/MIPS specific. + DW_CFA_MIPS_advance_loc8 = 0x1d, + + // GNU extensions. + DW_CFA_GNU_window_save = 0x2d, + DW_CFA_GNU_args_size = 0x2e, + DW_CFA_GNU_negative_offset_extended = 0x2f + }; + +// Exception handling 'z' augmentation letters. +enum DwarfZAugmentationCodes { + // If the CFI augmentation string begins with 'z', then the CIE and FDE + // have an augmentation data area just before the instructions, whose + // contents are determined by the subsequent augmentation letters. + DW_Z_augmentation_start = 'z', + + // If this letter is present in a 'z' augmentation string, the CIE + // augmentation data includes a pointer encoding, and the FDE + // augmentation data includes a language-specific data area pointer, + // represented using that encoding. + DW_Z_has_LSDA = 'L', + + // If this letter is present in a 'z' augmentation string, the CIE + // augmentation data includes a pointer encoding, followed by a pointer + // to a personality routine, represented using that encoding. + DW_Z_has_personality_routine = 'P', + + // If this letter is present in a 'z' augmentation string, the CIE + // augmentation data includes a pointer encoding describing how the FDE's + // initial location, address range, and DW_CFA_set_loc operands are + // encoded. + DW_Z_has_FDE_address_encoding = 'R', + + // If this letter is present in a 'z' augmentation string, then code + // addresses covered by FDEs that cite this CIE are signal delivery + // trampolines. Return addresses of frames in trampolines should not be + // adjusted as described in section 6.4.4 of the DWARF 3 spec. + DW_Z_is_signal_trampoline = 'S' +}; + +// Exception handling frame description pointer formats, as described +// by the Linux Standard Base Core Specification 4.0, section 11.5, +// DWARF Extensions. +enum DwarfPointerEncoding + { + DW_EH_PE_absptr = 0x00, + DW_EH_PE_omit = 0xff, + DW_EH_PE_uleb128 = 0x01, + DW_EH_PE_udata2 = 0x02, + DW_EH_PE_udata4 = 0x03, + DW_EH_PE_udata8 = 0x04, + DW_EH_PE_sleb128 = 0x09, + DW_EH_PE_sdata2 = 0x0A, + DW_EH_PE_sdata4 = 0x0B, + DW_EH_PE_sdata8 = 0x0C, + DW_EH_PE_pcrel = 0x10, + DW_EH_PE_textrel = 0x20, + DW_EH_PE_datarel = 0x30, + DW_EH_PE_funcrel = 0x40, + DW_EH_PE_aligned = 0x50, + + // The GNU toolchain sources define this enum value as well, + // simply to help classify the lower nybble values into signed and + // unsigned groups. + DW_EH_PE_signed = 0x08, + + // This is not documented in LSB 4.0, but it is used in both the + // Linux and OS X toolchains. It can be added to any other + // encoding (except DW_EH_PE_aligned), and indicates that the + // encoded value represents the address at which the true address + // is stored, not the true address itself. + DW_EH_PE_indirect = 0x80 + }; + +} // namespace dwarf2reader +#endif // COMMON_DWARF_DWARF2ENUMS_H__ diff --git a/3rdParty/Breakpad/src/common/dwarf/dwarf2reader.cc b/3rdParty/Breakpad/src/common/dwarf/dwarf2reader.cc new file mode 100644 index 0000000..7c1a29d --- /dev/null +++ b/3rdParty/Breakpad/src/common/dwarf/dwarf2reader.cc @@ -0,0 +1,2340 @@ +// Copyright (c) 2010 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// Implementation of dwarf2reader::LineInfo, dwarf2reader::CompilationUnit, +// and dwarf2reader::CallFrameInfo. See dwarf2reader.h for details. + +#include "common/dwarf/dwarf2reader.h" + +#include <assert.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> + +#include <map> +#include <memory> +#include <stack> +#include <string> +#include <utility> + +#include "common/dwarf/bytereader-inl.h" +#include "common/dwarf/bytereader.h" +#include "common/dwarf/line_state_machine.h" +#include "common/using_std_string.h" + +namespace dwarf2reader { + +CompilationUnit::CompilationUnit(const SectionMap& sections, uint64 offset, + ByteReader* reader, Dwarf2Handler* handler) + : offset_from_section_start_(offset), reader_(reader), + sections_(sections), handler_(handler), abbrevs_(NULL), + string_buffer_(NULL), string_buffer_length_(0) {} + +// Read a DWARF2/3 abbreviation section. +// Each abbrev consists of a abbreviation number, a tag, a byte +// specifying whether the tag has children, and a list of +// attribute/form pairs. +// The list of forms is terminated by a 0 for the attribute, and a +// zero for the form. The entire abbreviation section is terminated +// by a zero for the code. + +void CompilationUnit::ReadAbbrevs() { + if (abbrevs_) + return; + + // First get the debug_abbrev section. ".debug_abbrev" is the name + // recommended in the DWARF spec, and used on Linux; + // "__debug_abbrev" is the name used in Mac OS X Mach-O files. + SectionMap::const_iterator iter = sections_.find(".debug_abbrev"); + if (iter == sections_.end()) + iter = sections_.find("__debug_abbrev"); + assert(iter != sections_.end()); + + abbrevs_ = new std::vector<Abbrev>; + abbrevs_->resize(1); + + // The only way to check whether we are reading over the end of the + // buffer would be to first compute the size of the leb128 data by + // reading it, then go back and read it again. + const char* abbrev_start = iter->second.first + + header_.abbrev_offset; + const char* abbrevptr = abbrev_start; +#ifndef NDEBUG + const uint64 abbrev_length = iter->second.second - header_.abbrev_offset; +#endif + + while (1) { + CompilationUnit::Abbrev abbrev; + size_t len; + const uint64 number = reader_->ReadUnsignedLEB128(abbrevptr, &len); + + if (number == 0) + break; + abbrev.number = number; + abbrevptr += len; + + assert(abbrevptr < abbrev_start + abbrev_length); + const uint64 tag = reader_->ReadUnsignedLEB128(abbrevptr, &len); + abbrevptr += len; + abbrev.tag = static_cast<enum DwarfTag>(tag); + + assert(abbrevptr < abbrev_start + abbrev_length); + abbrev.has_children = reader_->ReadOneByte(abbrevptr); + abbrevptr += 1; + + assert(abbrevptr < abbrev_start + abbrev_length); + + while (1) { + const uint64 nametemp = reader_->ReadUnsignedLEB128(abbrevptr, &len); + abbrevptr += len; + + assert(abbrevptr < abbrev_start + abbrev_length); + const uint64 formtemp = reader_->ReadUnsignedLEB128(abbrevptr, &len); + abbrevptr += len; + if (nametemp == 0 && formtemp == 0) + break; + + const enum DwarfAttribute name = + static_cast<enum DwarfAttribute>(nametemp); + const enum DwarfForm form = static_cast<enum DwarfForm>(formtemp); + abbrev.attributes.push_back(std::make_pair(name, form)); + } + assert(abbrev.number == abbrevs_->size()); + abbrevs_->push_back(abbrev); + } +} + +// Skips a single DIE's attributes. +const char* CompilationUnit::SkipDIE(const char* start, + const Abbrev& abbrev) { + for (AttributeList::const_iterator i = abbrev.attributes.begin(); + i != abbrev.attributes.end(); + i++) { + start = SkipAttribute(start, i->second); + } + return start; +} + +// Skips a single attribute form's data. +const char* CompilationUnit::SkipAttribute(const char* start, + enum DwarfForm form) { + size_t len; + + switch (form) { + case DW_FORM_indirect: + form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start, + &len)); + start += len; + return SkipAttribute(start, form); + + case DW_FORM_flag_present: + return start; + case DW_FORM_data1: + case DW_FORM_flag: + case DW_FORM_ref1: + return start + 1; + case DW_FORM_ref2: + case DW_FORM_data2: + return start + 2; + case DW_FORM_ref4: + case DW_FORM_data4: + return start + 4; + case DW_FORM_ref8: + case DW_FORM_data8: + case DW_FORM_ref_sig8: + return start + 8; + case DW_FORM_string: + return start + strlen(start) + 1; + case DW_FORM_udata: + case DW_FORM_ref_udata: + reader_->ReadUnsignedLEB128(start, &len); + return start + len; + + case DW_FORM_sdata: + reader_->ReadSignedLEB128(start, &len); + return start + len; + case DW_FORM_addr: + return start + reader_->AddressSize(); + case DW_FORM_ref_addr: + // DWARF2 and 3 differ on whether ref_addr is address size or + // offset size. + assert(header_.version == 2 || header_.version == 3); + if (header_.version == 2) { + return start + reader_->AddressSize(); + } else if (header_.version == 3) { + return start + reader_->OffsetSize(); + } + + case DW_FORM_block1: + return start + 1 + reader_->ReadOneByte(start); + case DW_FORM_block2: + return start + 2 + reader_->ReadTwoBytes(start); + case DW_FORM_block4: + return start + 4 + reader_->ReadFourBytes(start); + case DW_FORM_block: + case DW_FORM_exprloc: { + uint64 size = reader_->ReadUnsignedLEB128(start, &len); + return start + size + len; + } + case DW_FORM_strp: + case DW_FORM_sec_offset: + return start + reader_->OffsetSize(); + } + fprintf(stderr,"Unhandled form type"); + return NULL; +} + +// Read a DWARF2/3 header. +// The header is variable length in DWARF3 (and DWARF2 as extended by +// most compilers), and consists of an length field, a version number, +// the offset in the .debug_abbrev section for our abbrevs, and an +// address size. +void CompilationUnit::ReadHeader() { + const char* headerptr = buffer_; + size_t initial_length_size; + + assert(headerptr + 4 < buffer_ + buffer_length_); + const uint64 initial_length + = reader_->ReadInitialLength(headerptr, &initial_length_size); + headerptr += initial_length_size; + header_.length = initial_length; + + assert(headerptr + 2 < buffer_ + buffer_length_); + header_.version = reader_->ReadTwoBytes(headerptr); + headerptr += 2; + + assert(headerptr + reader_->OffsetSize() < buffer_ + buffer_length_); + header_.abbrev_offset = reader_->ReadOffset(headerptr); + headerptr += reader_->OffsetSize(); + + assert(headerptr + 1 < buffer_ + buffer_length_); + header_.address_size = reader_->ReadOneByte(headerptr); + reader_->SetAddressSize(header_.address_size); + headerptr += 1; + + after_header_ = headerptr; + + // This check ensures that we don't have to do checking during the + // reading of DIEs. header_.length does not include the size of the + // initial length. + assert(buffer_ + initial_length_size + header_.length <= + buffer_ + buffer_length_); +} + +uint64 CompilationUnit::Start() { + // First get the debug_info section. ".debug_info" is the name + // recommended in the DWARF spec, and used on Linux; "__debug_info" + // is the name used in Mac OS X Mach-O files. + SectionMap::const_iterator iter = sections_.find(".debug_info"); + if (iter == sections_.end()) + iter = sections_.find("__debug_info"); + assert(iter != sections_.end()); + + // Set up our buffer + buffer_ = iter->second.first + offset_from_section_start_; + buffer_length_ = iter->second.second - offset_from_section_start_; + + // Read the header + ReadHeader(); + + // Figure out the real length from the end of the initial length to + // the end of the compilation unit, since that is the value we + // return. + uint64 ourlength = header_.length; + if (reader_->OffsetSize() == 8) + ourlength += 12; + else + ourlength += 4; + + // See if the user wants this compilation unit, and if not, just return. + if (!handler_->StartCompilationUnit(offset_from_section_start_, + reader_->AddressSize(), + reader_->OffsetSize(), + header_.length, + header_.version)) + return ourlength; + + // Otherwise, continue by reading our abbreviation entries. + ReadAbbrevs(); + + // Set the string section if we have one. ".debug_str" is the name + // recommended in the DWARF spec, and used on Linux; "__debug_str" + // is the name used in Mac OS X Mach-O files. + iter = sections_.find(".debug_str"); + if (iter == sections_.end()) + iter = sections_.find("__debug_str"); + if (iter != sections_.end()) { + string_buffer_ = iter->second.first; + string_buffer_length_ = iter->second.second; + } + + // Now that we have our abbreviations, start processing DIE's. + ProcessDIEs(); + + return ourlength; +} + +// If one really wanted, you could merge SkipAttribute and +// ProcessAttribute +// This is all boring data manipulation and calling of the handler. +const char* CompilationUnit::ProcessAttribute( + uint64 dieoffset, const char* start, enum DwarfAttribute attr, + enum DwarfForm form) { + size_t len; + + switch (form) { + // DW_FORM_indirect is never used because it is such a space + // waster. + case DW_FORM_indirect: + form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start, + &len)); + start += len; + return ProcessAttribute(dieoffset, start, attr, form); + + case DW_FORM_flag_present: + handler_->ProcessAttributeUnsigned(dieoffset, attr, form, 1); + return start; + case DW_FORM_data1: + case DW_FORM_flag: + handler_->ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadOneByte(start)); + return start + 1; + case DW_FORM_data2: + handler_->ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadTwoBytes(start)); + return start + 2; + case DW_FORM_data4: + handler_->ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadFourBytes(start)); + return start + 4; + case DW_FORM_data8: + handler_->ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadEightBytes(start)); + return start + 8; + case DW_FORM_string: { + const char* str = start; + handler_->ProcessAttributeString(dieoffset, attr, form, + str); + return start + strlen(str) + 1; + } + case DW_FORM_udata: + handler_->ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadUnsignedLEB128(start, + &len)); + return start + len; + + case DW_FORM_sdata: + handler_->ProcessAttributeSigned(dieoffset, attr, form, + reader_->ReadSignedLEB128(start, &len)); + return start + len; + case DW_FORM_addr: + handler_->ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadAddress(start)); + return start + reader_->AddressSize(); + case DW_FORM_sec_offset: + handler_->ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadOffset(start)); + return start + reader_->OffsetSize(); + + case DW_FORM_ref1: + handler_->ProcessAttributeReference(dieoffset, attr, form, + reader_->ReadOneByte(start) + + offset_from_section_start_); + return start + 1; + case DW_FORM_ref2: + handler_->ProcessAttributeReference(dieoffset, attr, form, + reader_->ReadTwoBytes(start) + + offset_from_section_start_); + return start + 2; + case DW_FORM_ref4: + handler_->ProcessAttributeReference(dieoffset, attr, form, + reader_->ReadFourBytes(start) + + offset_from_section_start_); + return start + 4; + case DW_FORM_ref8: + handler_->ProcessAttributeReference(dieoffset, attr, form, + reader_->ReadEightBytes(start) + + offset_from_section_start_); + return start + 8; + case DW_FORM_ref_udata: + handler_->ProcessAttributeReference(dieoffset, attr, form, + reader_->ReadUnsignedLEB128(start, + &len) + + offset_from_section_start_); + return start + len; + case DW_FORM_ref_addr: + // DWARF2 and 3 differ on whether ref_addr is address size or + // offset size. + assert(header_.version == 2 || header_.version == 3); + if (header_.version == 2) { + handler_->ProcessAttributeReference(dieoffset, attr, form, + reader_->ReadAddress(start)); + return start + reader_->AddressSize(); + } else if (header_.version == 3) { + handler_->ProcessAttributeReference(dieoffset, attr, form, + reader_->ReadOffset(start)); + return start + reader_->OffsetSize(); + } + break; + case DW_FORM_ref_sig8: + handler_->ProcessAttributeSignature(dieoffset, attr, form, + reader_->ReadEightBytes(start)); + return start + 8; + + case DW_FORM_block1: { + uint64 datalen = reader_->ReadOneByte(start); + handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 1, + datalen); + return start + 1 + datalen; + } + case DW_FORM_block2: { + uint64 datalen = reader_->ReadTwoBytes(start); + handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 2, + datalen); + return start + 2 + datalen; + } + case DW_FORM_block4: { + uint64 datalen = reader_->ReadFourBytes(start); + handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 4, + datalen); + return start + 4 + datalen; + } + case DW_FORM_block: + case DW_FORM_exprloc: { + uint64 datalen = reader_->ReadUnsignedLEB128(start, &len); + handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + len, + datalen); + return start + datalen + len; + } + case DW_FORM_strp: { + assert(string_buffer_ != NULL); + + const uint64 offset = reader_->ReadOffset(start); + assert(string_buffer_ + offset < string_buffer_ + string_buffer_length_); + + const char* str = string_buffer_ + offset; + handler_->ProcessAttributeString(dieoffset, attr, form, + str); + return start + reader_->OffsetSize(); + } + } + fprintf(stderr, "Unhandled form type\n"); + return NULL; +} + +const char* CompilationUnit::ProcessDIE(uint64 dieoffset, + const char* start, + const Abbrev& abbrev) { + for (AttributeList::const_iterator i = abbrev.attributes.begin(); + i != abbrev.attributes.end(); + i++) { + start = ProcessAttribute(dieoffset, start, i->first, i->second); + } + return start; +} + +void CompilationUnit::ProcessDIEs() { + const char* dieptr = after_header_; + size_t len; + + // lengthstart is the place the length field is based on. + // It is the point in the header after the initial length field + const char* lengthstart = buffer_; + + // In 64 bit dwarf, the initial length is 12 bytes, because of the + // 0xffffffff at the start. + if (reader_->OffsetSize() == 8) + lengthstart += 12; + else + lengthstart += 4; + + std::stack<uint64> die_stack; + + while (dieptr < (lengthstart + header_.length)) { + // We give the user the absolute offset from the beginning of + // debug_info, since they need it to deal with ref_addr forms. + uint64 absolute_offset = (dieptr - buffer_) + offset_from_section_start_; + + uint64 abbrev_num = reader_->ReadUnsignedLEB128(dieptr, &len); + + dieptr += len; + + // Abbrev == 0 represents the end of a list of children, or padding + // at the end of the compilation unit. + if (abbrev_num == 0) { + if (die_stack.size() == 0) + // If it is padding, then we are done with the compilation unit's DIEs. + return; + const uint64 offset = die_stack.top(); + die_stack.pop(); + handler_->EndDIE(offset); + continue; + } + + const Abbrev& abbrev = abbrevs_->at(static_cast<size_t>(abbrev_num)); + const enum DwarfTag tag = abbrev.tag; + if (!handler_->StartDIE(absolute_offset, tag, abbrev.attributes)) { + dieptr = SkipDIE(dieptr, abbrev); + } else { + dieptr = ProcessDIE(absolute_offset, dieptr, abbrev); + } + + if (abbrev.has_children) { + die_stack.push(absolute_offset); + } else { + handler_->EndDIE(absolute_offset); + } + } +} + +LineInfo::LineInfo(const char* buffer, uint64 buffer_length, + ByteReader* reader, LineInfoHandler* handler): + handler_(handler), reader_(reader), buffer_(buffer), + buffer_length_(buffer_length) { + header_.std_opcode_lengths = NULL; +} + +uint64 LineInfo::Start() { + ReadHeader(); + ReadLines(); + return after_header_ - buffer_; +} + +// The header for a debug_line section is mildly complicated, because +// the line info is very tightly encoded. +void LineInfo::ReadHeader() { + const char* lineptr = buffer_; + size_t initial_length_size; + + const uint64 initial_length + = reader_->ReadInitialLength(lineptr, &initial_length_size); + + lineptr += initial_length_size; + header_.total_length = initial_length; + assert(buffer_ + initial_length_size + header_.total_length <= + buffer_ + buffer_length_); + + // Address size *must* be set by CU ahead of time. + assert(reader_->AddressSize() != 0); + + header_.version = reader_->ReadTwoBytes(lineptr); + lineptr += 2; + + header_.prologue_length = reader_->ReadOffset(lineptr); + lineptr += reader_->OffsetSize(); + + header_.min_insn_length = reader_->ReadOneByte(lineptr); + lineptr += 1; + + header_.default_is_stmt = reader_->ReadOneByte(lineptr); + lineptr += 1; + + header_.line_base = *reinterpret_cast<const int8*>(lineptr); + lineptr += 1; + + header_.line_range = reader_->ReadOneByte(lineptr); + lineptr += 1; + + header_.opcode_base = reader_->ReadOneByte(lineptr); + lineptr += 1; + + header_.std_opcode_lengths = new std::vector<unsigned char>; + header_.std_opcode_lengths->resize(header_.opcode_base + 1); + (*header_.std_opcode_lengths)[0] = 0; + for (int i = 1; i < header_.opcode_base; i++) { + (*header_.std_opcode_lengths)[i] = reader_->ReadOneByte(lineptr); + lineptr += 1; + } + + // It is legal for the directory entry table to be empty. + if (*lineptr) { + uint32 dirindex = 1; + while (*lineptr) { + const char* dirname = lineptr; + handler_->DefineDir(dirname, dirindex); + lineptr += strlen(dirname) + 1; + dirindex++; + } + } + lineptr++; + + // It is also legal for the file entry table to be empty. + if (*lineptr) { + uint32 fileindex = 1; + size_t len; + while (*lineptr) { + const char* filename = lineptr; + lineptr += strlen(filename) + 1; + + uint64 dirindex = reader_->ReadUnsignedLEB128(lineptr, &len); + lineptr += len; + + uint64 mod_time = reader_->ReadUnsignedLEB128(lineptr, &len); + lineptr += len; + + uint64 filelength = reader_->ReadUnsignedLEB128(lineptr, &len); + lineptr += len; + handler_->DefineFile(filename, fileindex, static_cast<uint32>(dirindex), + mod_time, filelength); + fileindex++; + } + } + lineptr++; + + after_header_ = lineptr; +} + +/* static */ +bool LineInfo::ProcessOneOpcode(ByteReader* reader, + LineInfoHandler* handler, + const struct LineInfoHeader &header, + const char* start, + struct LineStateMachine* lsm, + size_t* len, + uintptr pc, + bool *lsm_passes_pc) { + size_t oplen = 0; + size_t templen; + uint8 opcode = reader->ReadOneByte(start); + oplen++; + start++; + + // If the opcode is great than the opcode_base, it is a special + // opcode. Most line programs consist mainly of special opcodes. + if (opcode >= header.opcode_base) { + opcode -= header.opcode_base; + const int64 advance_address = (opcode / header.line_range) + * header.min_insn_length; + const int32 advance_line = (opcode % header.line_range) + + header.line_base; + + // Check if the lsm passes "pc". If so, mark it as passed. + if (lsm_passes_pc && + lsm->address <= pc && pc < lsm->address + advance_address) { + *lsm_passes_pc = true; + } + + lsm->address += advance_address; + lsm->line_num += advance_line; + lsm->basic_block = true; + *len = oplen; + return true; + } + + // Otherwise, we have the regular opcodes + switch (opcode) { + case DW_LNS_copy: { + lsm->basic_block = false; + *len = oplen; + return true; + } + + case DW_LNS_advance_pc: { + uint64 advance_address = reader->ReadUnsignedLEB128(start, &templen); + oplen += templen; + + // Check if the lsm passes "pc". If so, mark it as passed. + if (lsm_passes_pc && lsm->address <= pc && + pc < lsm->address + header.min_insn_length * advance_address) { + *lsm_passes_pc = true; + } + + lsm->address += header.min_insn_length * advance_address; + } + break; + case DW_LNS_advance_line: { + const int64 advance_line = reader->ReadSignedLEB128(start, &templen); + oplen += templen; + lsm->line_num += static_cast<int32>(advance_line); + + // With gcc 4.2.1, we can get the line_no here for the first time + // since DW_LNS_advance_line is called after DW_LNE_set_address is + // called. So we check if the lsm passes "pc" here, not in + // DW_LNE_set_address. + if (lsm_passes_pc && lsm->address == pc) { + *lsm_passes_pc = true; + } + } + break; + case DW_LNS_set_file: { + const uint64 fileno = reader->ReadUnsignedLEB128(start, &templen); + oplen += templen; + lsm->file_num = static_cast<uint32>(fileno); + } + break; + case DW_LNS_set_column: { + const uint64 colno = reader->ReadUnsignedLEB128(start, &templen); + oplen += templen; + lsm->column_num = static_cast<uint32>(colno); + } + break; + case DW_LNS_negate_stmt: { + lsm->is_stmt = !lsm->is_stmt; + } + break; + case DW_LNS_set_basic_block: { + lsm->basic_block = true; + } + break; + case DW_LNS_fixed_advance_pc: { + const uint16 advance_address = reader->ReadTwoBytes(start); + oplen += 2; + + // Check if the lsm passes "pc". If so, mark it as passed. + if (lsm_passes_pc && + lsm->address <= pc && pc < lsm->address + advance_address) { + *lsm_passes_pc = true; + } + + lsm->address += advance_address; + } + break; + case DW_LNS_const_add_pc: { + const int64 advance_address = header.min_insn_length + * ((255 - header.opcode_base) + / header.line_range); + + // Check if the lsm passes "pc". If so, mark it as passed. + if (lsm_passes_pc && + lsm->address <= pc && pc < lsm->address + advance_address) { + *lsm_passes_pc = true; + } + + lsm->address += advance_address; + } + break; + case DW_LNS_extended_op: { + const uint64 extended_op_len = reader->ReadUnsignedLEB128(start, + &templen); + start += templen; + oplen += templen + extended_op_len; + + const uint64 extended_op = reader->ReadOneByte(start); + start++; + + switch (extended_op) { + case DW_LNE_end_sequence: { + lsm->end_sequence = true; + *len = oplen; + return true; + } + break; + case DW_LNE_set_address: { + // With gcc 4.2.1, we cannot tell the line_no here since + // DW_LNE_set_address is called before DW_LNS_advance_line is + // called. So we do not check if the lsm passes "pc" here. See + // also the comment in DW_LNS_advance_line. + uint64 address = reader->ReadAddress(start); + lsm->address = address; + } + break; + case DW_LNE_define_file: { + const char* filename = start; + + templen = strlen(filename) + 1; + start += templen; + + uint64 dirindex = reader->ReadUnsignedLEB128(start, &templen); + oplen += templen; + + const uint64 mod_time = reader->ReadUnsignedLEB128(start, + &templen); + oplen += templen; + + const uint64 filelength = reader->ReadUnsignedLEB128(start, + &templen); + oplen += templen; + + if (handler) { + handler->DefineFile(filename, -1, static_cast<uint32>(dirindex), + mod_time, filelength); + } + } + break; + } + } + break; + + default: { + // Ignore unknown opcode silently + if (header.std_opcode_lengths) { + for (int i = 0; i < (*header.std_opcode_lengths)[opcode]; i++) { + reader->ReadUnsignedLEB128(start, &templen); + start += templen; + oplen += templen; + } + } + } + break; + } + *len = oplen; + return false; +} + +void LineInfo::ReadLines() { + struct LineStateMachine lsm; + + // lengthstart is the place the length field is based on. + // It is the point in the header after the initial length field + const char* lengthstart = buffer_; + + // In 64 bit dwarf, the initial length is 12 bytes, because of the + // 0xffffffff at the start. + if (reader_->OffsetSize() == 8) + lengthstart += 12; + else + lengthstart += 4; + + const char* lineptr = after_header_; + lsm.Reset(header_.default_is_stmt); + + // The LineInfoHandler interface expects each line's length along + // with its address, but DWARF only provides addresses (sans + // length), and an end-of-sequence address; one infers the length + // from the next address. So we report a line only when we get the + // next line's address, or the end-of-sequence address. + bool have_pending_line = false; + uint64 pending_address = 0; + uint32 pending_file_num = 0, pending_line_num = 0, pending_column_num = 0; + + while (lineptr < lengthstart + header_.total_length) { + size_t oplength; + bool add_row = ProcessOneOpcode(reader_, handler_, header_, + lineptr, &lsm, &oplength, (uintptr)-1, + NULL); + if (add_row) { + if (have_pending_line) + handler_->AddLine(pending_address, lsm.address - pending_address, + pending_file_num, pending_line_num, + pending_column_num); + if (lsm.end_sequence) { + lsm.Reset(header_.default_is_stmt); + have_pending_line = false; + } else { + pending_address = lsm.address; + pending_file_num = lsm.file_num; + pending_line_num = lsm.line_num; + pending_column_num = lsm.column_num; + have_pending_line = true; + } + } + lineptr += oplength; + } + + after_header_ = lengthstart + header_.total_length; +} + +// A DWARF rule for recovering the address or value of a register, or +// computing the canonical frame address. There is one subclass of this for +// each '*Rule' member function in CallFrameInfo::Handler. +// +// It's annoying that we have to handle Rules using pointers (because +// the concrete instances can have an arbitrary size). They're small, +// so it would be much nicer if we could just handle them by value +// instead of fretting about ownership and destruction. +// +// It seems like all these could simply be instances of std::tr1::bind, +// except that we need instances to be EqualityComparable, too. +// +// This could logically be nested within State, but then the qualified names +// get horrendous. +class CallFrameInfo::Rule { + public: + virtual ~Rule() { } + + // Tell HANDLER that, at ADDRESS in the program, REGISTER can be + // recovered using this rule. If REGISTER is kCFARegister, then this rule + // describes how to compute the canonical frame address. Return what the + // HANDLER member function returned. + virtual bool Handle(Handler *handler, + uint64 address, int register) const = 0; + + // Equality on rules. We use these to decide which rules we need + // to report after a DW_CFA_restore_state instruction. + virtual bool operator==(const Rule &rhs) const = 0; + + bool operator!=(const Rule &rhs) const { return ! (*this == rhs); } + + // Return a pointer to a copy of this rule. + virtual Rule *Copy() const = 0; + + // If this is a base+offset rule, change its base register to REG. + // Otherwise, do nothing. (Ugly, but required for DW_CFA_def_cfa_register.) + virtual void SetBaseRegister(unsigned reg) { } + + // If this is a base+offset rule, change its offset to OFFSET. Otherwise, + // do nothing. (Ugly, but required for DW_CFA_def_cfa_offset.) + virtual void SetOffset(long long offset) { } +}; + +// Rule: the value the register had in the caller cannot be recovered. +class CallFrameInfo::UndefinedRule: public CallFrameInfo::Rule { + public: + UndefinedRule() { } + ~UndefinedRule() { } + bool Handle(Handler *handler, uint64 address, int reg) const { + return handler->UndefinedRule(address, reg); + } + bool operator==(const Rule &rhs) const { + // dynamic_cast is allowed by the Google C++ Style Guide, if the use has + // been carefully considered; cheap RTTI-like workarounds are forbidden. + const UndefinedRule *our_rhs = dynamic_cast<const UndefinedRule *>(&rhs); + return (our_rhs != NULL); + } + Rule *Copy() const { return new UndefinedRule(*this); } +}; + +// Rule: the register's value is the same as that it had in the caller. +class CallFrameInfo::SameValueRule: public CallFrameInfo::Rule { + public: + SameValueRule() { } + ~SameValueRule() { } + bool Handle(Handler *handler, uint64 address, int reg) const { + return handler->SameValueRule(address, reg); + } + bool operator==(const Rule &rhs) const { + // dynamic_cast is allowed by the Google C++ Style Guide, if the use has + // been carefully considered; cheap RTTI-like workarounds are forbidden. + const SameValueRule *our_rhs = dynamic_cast<const SameValueRule *>(&rhs); + return (our_rhs != NULL); + } + Rule *Copy() const { return new SameValueRule(*this); } +}; + +// Rule: the register is saved at OFFSET from BASE_REGISTER. BASE_REGISTER +// may be CallFrameInfo::Handler::kCFARegister. +class CallFrameInfo::OffsetRule: public CallFrameInfo::Rule { + public: + OffsetRule(int base_register, long offset) + : base_register_(base_register), offset_(offset) { } + ~OffsetRule() { } + bool Handle(Handler *handler, uint64 address, int reg) const { + return handler->OffsetRule(address, reg, base_register_, offset_); + } + bool operator==(const Rule &rhs) const { + // dynamic_cast is allowed by the Google C++ Style Guide, if the use has + // been carefully considered; cheap RTTI-like workarounds are forbidden. + const OffsetRule *our_rhs = dynamic_cast<const OffsetRule *>(&rhs); + return (our_rhs && + base_register_ == our_rhs->base_register_ && + offset_ == our_rhs->offset_); + } + Rule *Copy() const { return new OffsetRule(*this); } + // We don't actually need SetBaseRegister or SetOffset here, since they + // are only ever applied to CFA rules, for DW_CFA_def_cfa_offset, and it + // doesn't make sense to use OffsetRule for computing the CFA: it + // computes the address at which a register is saved, not a value. + private: + int base_register_; + long offset_; +}; + +// Rule: the value the register had in the caller is the value of +// BASE_REGISTER plus offset. BASE_REGISTER may be +// CallFrameInfo::Handler::kCFARegister. +class CallFrameInfo::ValOffsetRule: public CallFrameInfo::Rule { + public: + ValOffsetRule(int base_register, long offset) + : base_register_(base_register), offset_(offset) { } + ~ValOffsetRule() { } + bool Handle(Handler *handler, uint64 address, int reg) const { + return handler->ValOffsetRule(address, reg, base_register_, offset_); + } + bool operator==(const Rule &rhs) const { + // dynamic_cast is allowed by the Google C++ Style Guide, if the use has + // been carefully considered; cheap RTTI-like workarounds are forbidden. + const ValOffsetRule *our_rhs = dynamic_cast<const ValOffsetRule *>(&rhs); + return (our_rhs && + base_register_ == our_rhs->base_register_ && + offset_ == our_rhs->offset_); + } + Rule *Copy() const { return new ValOffsetRule(*this); } + void SetBaseRegister(unsigned reg) { base_register_ = reg; } + void SetOffset(long long offset) { offset_ = offset; } + private: + int base_register_; + long offset_; +}; + +// Rule: the register has been saved in another register REGISTER_NUMBER_. +class CallFrameInfo::RegisterRule: public CallFrameInfo::Rule { + public: + explicit RegisterRule(int register_number) + : register_number_(register_number) { } + ~RegisterRule() { } + bool Handle(Handler *handler, uint64 address, int reg) const { + return handler->RegisterRule(address, reg, register_number_); + } + bool operator==(const Rule &rhs) const { + // dynamic_cast is allowed by the Google C++ Style Guide, if the use has + // been carefully considered; cheap RTTI-like workarounds are forbidden. + const RegisterRule *our_rhs = dynamic_cast<const RegisterRule *>(&rhs); + return (our_rhs && register_number_ == our_rhs->register_number_); + } + Rule *Copy() const { return new RegisterRule(*this); } + private: + int register_number_; +}; + +// Rule: EXPRESSION evaluates to the address at which the register is saved. +class CallFrameInfo::ExpressionRule: public CallFrameInfo::Rule { + public: + explicit ExpressionRule(const string &expression) + : expression_(expression) { } + ~ExpressionRule() { } + bool Handle(Handler *handler, uint64 address, int reg) const { + return handler->ExpressionRule(address, reg, expression_); + } + bool operator==(const Rule &rhs) const { + // dynamic_cast is allowed by the Google C++ Style Guide, if the use has + // been carefully considered; cheap RTTI-like workarounds are forbidden. + const ExpressionRule *our_rhs = dynamic_cast<const ExpressionRule *>(&rhs); + return (our_rhs && expression_ == our_rhs->expression_); + } + Rule *Copy() const { return new ExpressionRule(*this); } + private: + string expression_; +}; + +// Rule: EXPRESSION evaluates to the address at which the register is saved. +class CallFrameInfo::ValExpressionRule: public CallFrameInfo::Rule { + public: + explicit ValExpressionRule(const string &expression) + : expression_(expression) { } + ~ValExpressionRule() { } + bool Handle(Handler *handler, uint64 address, int reg) const { + return handler->ValExpressionRule(address, reg, expression_); + } + bool operator==(const Rule &rhs) const { + // dynamic_cast is allowed by the Google C++ Style Guide, if the use has + // been carefully considered; cheap RTTI-like workarounds are forbidden. + const ValExpressionRule *our_rhs = + dynamic_cast<const ValExpressionRule *>(&rhs); + return (our_rhs && expression_ == our_rhs->expression_); + } + Rule *Copy() const { return new ValExpressionRule(*this); } + private: + string expression_; +}; + +// A map from register numbers to rules. +class CallFrameInfo::RuleMap { + public: + RuleMap() : cfa_rule_(NULL) { } + RuleMap(const RuleMap &rhs) : cfa_rule_(NULL) { *this = rhs; } + ~RuleMap() { Clear(); } + + RuleMap &operator=(const RuleMap &rhs); + + // Set the rule for computing the CFA to RULE. Take ownership of RULE. + void SetCFARule(Rule *rule) { delete cfa_rule_; cfa_rule_ = rule; } + + // Return the current CFA rule. Unlike RegisterRule, this RuleMap retains + // ownership of the rule. We use this for DW_CFA_def_cfa_offset and + // DW_CFA_def_cfa_register, and for detecting references to the CFA before + // a rule for it has been established. + Rule *CFARule() const { return cfa_rule_; } + + // Return the rule for REG, or NULL if there is none. The caller takes + // ownership of the result. + Rule *RegisterRule(int reg) const; + + // Set the rule for computing REG to RULE. Take ownership of RULE. + void SetRegisterRule(int reg, Rule *rule); + + // Make all the appropriate calls to HANDLER as if we were changing from + // this RuleMap to NEW_RULES at ADDRESS. We use this to implement + // DW_CFA_restore_state, where lots of rules can change simultaneously. + // Return true if all handlers returned true; otherwise, return false. + bool HandleTransitionTo(Handler *handler, uint64 address, + const RuleMap &new_rules) const; + + private: + // A map from register numbers to Rules. + typedef std::map<int, Rule *> RuleByNumber; + + // Remove all register rules and clear cfa_rule_. + void Clear(); + + // The rule for computing the canonical frame address. This RuleMap owns + // this rule. + Rule *cfa_rule_; + + // A map from register numbers to postfix expressions to recover + // their values. This RuleMap owns the Rules the map refers to. + RuleByNumber registers_; +}; + +CallFrameInfo::RuleMap &CallFrameInfo::RuleMap::operator=(const RuleMap &rhs) { + Clear(); + // Since each map owns the rules it refers to, assignment must copy them. + if (rhs.cfa_rule_) cfa_rule_ = rhs.cfa_rule_->Copy(); + for (RuleByNumber::const_iterator it = rhs.registers_.begin(); + it != rhs.registers_.end(); it++) + registers_[it->first] = it->second->Copy(); + return *this; +} + +CallFrameInfo::Rule *CallFrameInfo::RuleMap::RegisterRule(int reg) const { + assert(reg != Handler::kCFARegister); + RuleByNumber::const_iterator it = registers_.find(reg); + if (it != registers_.end()) + return it->second->Copy(); + else + return NULL; +} + +void CallFrameInfo::RuleMap::SetRegisterRule(int reg, Rule *rule) { + assert(reg != Handler::kCFARegister); + assert(rule); + Rule **slot = ®isters_[reg]; + delete *slot; + *slot = rule; +} + +bool CallFrameInfo::RuleMap::HandleTransitionTo( + Handler *handler, + uint64 address, + const RuleMap &new_rules) const { + // Transition from cfa_rule_ to new_rules.cfa_rule_. + if (cfa_rule_ && new_rules.cfa_rule_) { + if (*cfa_rule_ != *new_rules.cfa_rule_ && + !new_rules.cfa_rule_->Handle(handler, address, + Handler::kCFARegister)) + return false; + } else if (cfa_rule_) { + // this RuleMap has a CFA rule but new_rules doesn't. + // CallFrameInfo::Handler has no way to handle this --- and shouldn't; + // it's garbage input. The instruction interpreter should have + // detected this and warned, so take no action here. + } else if (new_rules.cfa_rule_) { + // This shouldn't be possible: NEW_RULES is some prior state, and + // there's no way to remove entries. + assert(0); + } else { + // Both CFA rules are empty. No action needed. + } + + // Traverse the two maps in order by register number, and report + // whatever differences we find. + RuleByNumber::const_iterator old_it = registers_.begin(); + RuleByNumber::const_iterator new_it = new_rules.registers_.begin(); + while (old_it != registers_.end() && new_it != new_rules.registers_.end()) { + if (old_it->first < new_it->first) { + // This RuleMap has an entry for old_it->first, but NEW_RULES + // doesn't. + // + // This isn't really the right thing to do, but since CFI generally + // only mentions callee-saves registers, and GCC's convention for + // callee-saves registers is that they are unchanged, it's a good + // approximation. + if (!handler->SameValueRule(address, old_it->first)) + return false; + old_it++; + } else if (old_it->first > new_it->first) { + // NEW_RULES has entry for new_it->first, but this RuleMap + // doesn't. This shouldn't be possible: NEW_RULES is some prior + // state, and there's no way to remove entries. + assert(0); + } else { + // Both maps have an entry for this register. Report the new + // rule if it is different. + if (*old_it->second != *new_it->second && + !new_it->second->Handle(handler, address, new_it->first)) + return false; + new_it++, old_it++; + } + } + // Finish off entries from this RuleMap with no counterparts in new_rules. + while (old_it != registers_.end()) { + if (!handler->SameValueRule(address, old_it->first)) + return false; + old_it++; + } + // Since we only make transitions from a rule set to some previously + // saved rule set, and we can only add rules to the map, NEW_RULES + // must have fewer rules than *this. + assert(new_it == new_rules.registers_.end()); + + return true; +} + +// Remove all register rules and clear cfa_rule_. +void CallFrameInfo::RuleMap::Clear() { + delete cfa_rule_; + cfa_rule_ = NULL; + for (RuleByNumber::iterator it = registers_.begin(); + it != registers_.end(); it++) + delete it->second; + registers_.clear(); +} + +// The state of the call frame information interpreter as it processes +// instructions from a CIE and FDE. +class CallFrameInfo::State { + public: + // Create a call frame information interpreter state with the given + // reporter, reader, handler, and initial call frame info address. + State(ByteReader *reader, Handler *handler, Reporter *reporter, + uint64 address) + : reader_(reader), handler_(handler), reporter_(reporter), + address_(address), entry_(NULL), cursor_(NULL) { } + + // Interpret instructions from CIE, save the resulting rule set for + // DW_CFA_restore instructions, and return true. On error, report + // the problem to reporter_ and return false. + bool InterpretCIE(const CIE &cie); + + // Interpret instructions from FDE, and return true. On error, + // report the problem to reporter_ and return false. + bool InterpretFDE(const FDE &fde); + + private: + // The operands of a CFI instruction, for ParseOperands. + struct Operands { + unsigned register_number; // A register number. + uint64 offset; // An offset or address. + long signed_offset; // A signed offset. + string expression; // A DWARF expression. + }; + + // Parse CFI instruction operands from STATE's instruction stream as + // described by FORMAT. On success, populate OPERANDS with the + // results, and return true. On failure, report the problem and + // return false. + // + // Each character of FORMAT should be one of the following: + // + // 'r' unsigned LEB128 register number (OPERANDS->register_number) + // 'o' unsigned LEB128 offset (OPERANDS->offset) + // 's' signed LEB128 offset (OPERANDS->signed_offset) + // 'a' machine-size address (OPERANDS->offset) + // (If the CIE has a 'z' augmentation string, 'a' uses the + // encoding specified by the 'R' argument.) + // '1' a one-byte offset (OPERANDS->offset) + // '2' a two-byte offset (OPERANDS->offset) + // '4' a four-byte offset (OPERANDS->offset) + // '8' an eight-byte offset (OPERANDS->offset) + // 'e' a DW_FORM_block holding a (OPERANDS->expression) + // DWARF expression + bool ParseOperands(const char *format, Operands *operands); + + // Interpret one CFI instruction from STATE's instruction stream, update + // STATE, report any rule changes to handler_, and return true. On + // failure, report the problem and return false. + bool DoInstruction(); + + // The following Do* member functions are subroutines of DoInstruction, + // factoring out the actual work of operations that have several + // different encodings. + + // Set the CFA rule to be the value of BASE_REGISTER plus OFFSET, and + // return true. On failure, report and return false. (Used for + // DW_CFA_def_cfa and DW_CFA_def_cfa_sf.) + bool DoDefCFA(unsigned base_register, long offset); + + // Change the offset of the CFA rule to OFFSET, and return true. On + // failure, report and return false. (Subroutine for + // DW_CFA_def_cfa_offset and DW_CFA_def_cfa_offset_sf.) + bool DoDefCFAOffset(long offset); + + // Specify that REG can be recovered using RULE, and return true. On + // failure, report and return false. + bool DoRule(unsigned reg, Rule *rule); + + // Specify that REG can be found at OFFSET from the CFA, and return true. + // On failure, report and return false. (Subroutine for DW_CFA_offset, + // DW_CFA_offset_extended, and DW_CFA_offset_extended_sf.) + bool DoOffset(unsigned reg, long offset); + + // Specify that the caller's value for REG is the CFA plus OFFSET, + // and return true. On failure, report and return false. (Subroutine + // for DW_CFA_val_offset and DW_CFA_val_offset_sf.) + bool DoValOffset(unsigned reg, long offset); + + // Restore REG to the rule established in the CIE, and return true. On + // failure, report and return false. (Subroutine for DW_CFA_restore and + // DW_CFA_restore_extended.) + bool DoRestore(unsigned reg); + + // Return the section offset of the instruction at cursor. For use + // in error messages. + uint64 CursorOffset() { return entry_->offset + (cursor_ - entry_->start); } + + // Report that entry_ is incomplete, and return false. For brevity. + bool ReportIncomplete() { + reporter_->Incomplete(entry_->offset, entry_->kind); + return false; + } + + // For reading multi-byte values with the appropriate endianness. + ByteReader *reader_; + + // The handler to which we should report the data we find. + Handler *handler_; + + // For reporting problems in the info we're parsing. + Reporter *reporter_; + + // The code address to which the next instruction in the stream applies. + uint64 address_; + + // The entry whose instructions we are currently processing. This is + // first a CIE, and then an FDE. + const Entry *entry_; + + // The next instruction to process. + const char *cursor_; + + // The current set of rules. + RuleMap rules_; + + // The set of rules established by the CIE, used by DW_CFA_restore + // and DW_CFA_restore_extended. We set this after interpreting the + // CIE's instructions. + RuleMap cie_rules_; + + // A stack of saved states, for DW_CFA_remember_state and + // DW_CFA_restore_state. + std::stack<RuleMap> saved_rules_; +}; + +bool CallFrameInfo::State::InterpretCIE(const CIE &cie) { + entry_ = &cie; + cursor_ = entry_->instructions; + while (cursor_ < entry_->end) + if (!DoInstruction()) + return false; + // Note the rules established by the CIE, for use by DW_CFA_restore + // and DW_CFA_restore_extended. + cie_rules_ = rules_; + return true; +} + +bool CallFrameInfo::State::InterpretFDE(const FDE &fde) { + entry_ = &fde; + cursor_ = entry_->instructions; + while (cursor_ < entry_->end) + if (!DoInstruction()) + return false; + return true; +} + +bool CallFrameInfo::State::ParseOperands(const char *format, + Operands *operands) { + size_t len; + const char *operand; + + for (operand = format; *operand; operand++) { + size_t bytes_left = entry_->end - cursor_; + switch (*operand) { + case 'r': + operands->register_number = reader_->ReadUnsignedLEB128(cursor_, &len); + if (len > bytes_left) return ReportIncomplete(); + cursor_ += len; + break; + + case 'o': + operands->offset = reader_->ReadUnsignedLEB128(cursor_, &len); + if (len > bytes_left) return ReportIncomplete(); + cursor_ += len; + break; + + case 's': + operands->signed_offset = reader_->ReadSignedLEB128(cursor_, &len); + if (len > bytes_left) return ReportIncomplete(); + cursor_ += len; + break; + + case 'a': + operands->offset = + reader_->ReadEncodedPointer(cursor_, entry_->cie->pointer_encoding, + &len); + if (len > bytes_left) return ReportIncomplete(); + cursor_ += len; + break; + + case '1': + if (1 > bytes_left) return ReportIncomplete(); + operands->offset = static_cast<unsigned char>(*cursor_++); + break; + + case '2': + if (2 > bytes_left) return ReportIncomplete(); + operands->offset = reader_->ReadTwoBytes(cursor_); + cursor_ += 2; + break; + + case '4': + if (4 > bytes_left) return ReportIncomplete(); + operands->offset = reader_->ReadFourBytes(cursor_); + cursor_ += 4; + break; + + case '8': + if (8 > bytes_left) return ReportIncomplete(); + operands->offset = reader_->ReadEightBytes(cursor_); + cursor_ += 8; + break; + + case 'e': { + size_t expression_length = reader_->ReadUnsignedLEB128(cursor_, &len); + if (len > bytes_left || expression_length > bytes_left - len) + return ReportIncomplete(); + cursor_ += len; + operands->expression = string(cursor_, expression_length); + cursor_ += expression_length; + break; + } + + default: + assert(0); + } + } + + return true; +} + +bool CallFrameInfo::State::DoInstruction() { + CIE *cie = entry_->cie; + Operands ops; + + // Our entry's kind should have been set by now. + assert(entry_->kind != kUnknown); + + // We shouldn't have been invoked unless there were more + // instructions to parse. + assert(cursor_ < entry_->end); + + unsigned opcode = *cursor_++; + if ((opcode & 0xc0) != 0) { + switch (opcode & 0xc0) { + // Advance the address. + case DW_CFA_advance_loc: { + size_t code_offset = opcode & 0x3f; + address_ += code_offset * cie->code_alignment_factor; + break; + } + + // Find a register at an offset from the CFA. + case DW_CFA_offset: + if (!ParseOperands("o", &ops) || + !DoOffset(opcode & 0x3f, ops.offset * cie->data_alignment_factor)) + return false; + break; + + // Restore the rule established for a register by the CIE. + case DW_CFA_restore: + if (!DoRestore(opcode & 0x3f)) return false; + break; + + // The 'if' above should have excluded this possibility. + default: + assert(0); + } + + // Return here, so the big switch below won't be indented. + return true; + } + + switch (opcode) { + // Set the address. + case DW_CFA_set_loc: + if (!ParseOperands("a", &ops)) return false; + address_ = ops.offset; + break; + + // Advance the address. + case DW_CFA_advance_loc1: + if (!ParseOperands("1", &ops)) return false; + address_ += ops.offset * cie->code_alignment_factor; + break; + + // Advance the address. + case DW_CFA_advance_loc2: + if (!ParseOperands("2", &ops)) return false; + address_ += ops.offset * cie->code_alignment_factor; + break; + + // Advance the address. + case DW_CFA_advance_loc4: + if (!ParseOperands("4", &ops)) return false; + address_ += ops.offset * cie->code_alignment_factor; + break; + + // Advance the address. + case DW_CFA_MIPS_advance_loc8: + if (!ParseOperands("8", &ops)) return false; + address_ += ops.offset * cie->code_alignment_factor; + break; + + // Compute the CFA by adding an offset to a register. + case DW_CFA_def_cfa: + if (!ParseOperands("ro", &ops) || + !DoDefCFA(ops.register_number, ops.offset)) + return false; + break; + + // Compute the CFA by adding an offset to a register. + case DW_CFA_def_cfa_sf: + if (!ParseOperands("rs", &ops) || + !DoDefCFA(ops.register_number, + ops.signed_offset * cie->data_alignment_factor)) + return false; + break; + + // Change the base register used to compute the CFA. + case DW_CFA_def_cfa_register: { + Rule *cfa_rule = rules_.CFARule(); + if (!cfa_rule) { + reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset()); + return false; + } + if (!ParseOperands("r", &ops)) return false; + cfa_rule->SetBaseRegister(ops.register_number); + if (!cfa_rule->Handle(handler_, address_, + Handler::kCFARegister)) + return false; + break; + } + + // Change the offset used to compute the CFA. + case DW_CFA_def_cfa_offset: + if (!ParseOperands("o", &ops) || + !DoDefCFAOffset(ops.offset)) + return false; + break; + + // Change the offset used to compute the CFA. + case DW_CFA_def_cfa_offset_sf: + if (!ParseOperands("s", &ops) || + !DoDefCFAOffset(ops.signed_offset * cie->data_alignment_factor)) + return false; + break; + + // Specify an expression whose value is the CFA. + case DW_CFA_def_cfa_expression: { + if (!ParseOperands("e", &ops)) + return false; + Rule *rule = new ValExpressionRule(ops.expression); + rules_.SetCFARule(rule); + if (!rule->Handle(handler_, address_, + Handler::kCFARegister)) + return false; + break; + } + + // The register's value cannot be recovered. + case DW_CFA_undefined: { + if (!ParseOperands("r", &ops) || + !DoRule(ops.register_number, new UndefinedRule())) + return false; + break; + } + + // The register's value is unchanged from its value in the caller. + case DW_CFA_same_value: { + if (!ParseOperands("r", &ops) || + !DoRule(ops.register_number, new SameValueRule())) + return false; + break; + } + + // Find a register at an offset from the CFA. + case DW_CFA_offset_extended: + if (!ParseOperands("ro", &ops) || + !DoOffset(ops.register_number, + ops.offset * cie->data_alignment_factor)) + return false; + break; + + // The register is saved at an offset from the CFA. + case DW_CFA_offset_extended_sf: + if (!ParseOperands("rs", &ops) || + !DoOffset(ops.register_number, + ops.signed_offset * cie->data_alignment_factor)) + return false; + break; + + // The register is saved at an offset from the CFA. + case DW_CFA_GNU_negative_offset_extended: + if (!ParseOperands("ro", &ops) || + !DoOffset(ops.register_number, + -ops.offset * cie->data_alignment_factor)) + return false; + break; + + // The register's value is the sum of the CFA plus an offset. + case DW_CFA_val_offset: + if (!ParseOperands("ro", &ops) || + !DoValOffset(ops.register_number, + ops.offset * cie->data_alignment_factor)) + return false; + break; + + // The register's value is the sum of the CFA plus an offset. + case DW_CFA_val_offset_sf: + if (!ParseOperands("rs", &ops) || + !DoValOffset(ops.register_number, + ops.signed_offset * cie->data_alignment_factor)) + return false; + break; + + // The register has been saved in another register. + case DW_CFA_register: { + if (!ParseOperands("ro", &ops) || + !DoRule(ops.register_number, new RegisterRule(ops.offset))) + return false; + break; + } + + // An expression yields the address at which the register is saved. + case DW_CFA_expression: { + if (!ParseOperands("re", &ops) || + !DoRule(ops.register_number, new ExpressionRule(ops.expression))) + return false; + break; + } + + // An expression yields the caller's value for the register. + case DW_CFA_val_expression: { + if (!ParseOperands("re", &ops) || + !DoRule(ops.register_number, new ValExpressionRule(ops.expression))) + return false; + break; + } + + // Restore the rule established for a register by the CIE. + case DW_CFA_restore_extended: + if (!ParseOperands("r", &ops) || + !DoRestore( ops.register_number)) + return false; + break; + + // Save the current set of rules on a stack. + case DW_CFA_remember_state: + saved_rules_.push(rules_); + break; + + // Pop the current set of rules off the stack. + case DW_CFA_restore_state: { + if (saved_rules_.empty()) { + reporter_->EmptyStateStack(entry_->offset, entry_->kind, + CursorOffset()); + return false; + } + const RuleMap &new_rules = saved_rules_.top(); + if (rules_.CFARule() && !new_rules.CFARule()) { + reporter_->ClearingCFARule(entry_->offset, entry_->kind, + CursorOffset()); + return false; + } + rules_.HandleTransitionTo(handler_, address_, new_rules); + rules_ = new_rules; + saved_rules_.pop(); + break; + } + + // No operation. (Padding instruction.) + case DW_CFA_nop: + break; + + // A SPARC register window save: Registers 8 through 15 (%o0-%o7) + // are saved in registers 24 through 31 (%i0-%i7), and registers + // 16 through 31 (%l0-%l7 and %i0-%i7) are saved at CFA offsets + // (0-15 * the register size). The register numbers must be + // hard-coded. A GNU extension, and not a pretty one. + case DW_CFA_GNU_window_save: { + // Save %o0-%o7 in %i0-%i7. + for (int i = 8; i < 16; i++) + if (!DoRule(i, new RegisterRule(i + 16))) + return false; + // Save %l0-%l7 and %i0-%i7 at the CFA. + for (int i = 16; i < 32; i++) + // Assume that the byte reader's address size is the same as + // the architecture's register size. !@#%*^ hilarious. + if (!DoRule(i, new OffsetRule(Handler::kCFARegister, + (i - 16) * reader_->AddressSize()))) + return false; + break; + } + + // I'm not sure what this is. GDB doesn't use it for unwinding. + case DW_CFA_GNU_args_size: + if (!ParseOperands("o", &ops)) return false; + break; + + // An opcode we don't recognize. + default: { + reporter_->BadInstruction(entry_->offset, entry_->kind, CursorOffset()); + return false; + } + } + + return true; +} + +bool CallFrameInfo::State::DoDefCFA(unsigned base_register, long offset) { + Rule *rule = new ValOffsetRule(base_register, offset); + rules_.SetCFARule(rule); + return rule->Handle(handler_, address_, + Handler::kCFARegister); +} + +bool CallFrameInfo::State::DoDefCFAOffset(long offset) { + Rule *cfa_rule = rules_.CFARule(); + if (!cfa_rule) { + reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset()); + return false; + } + cfa_rule->SetOffset(offset); + return cfa_rule->Handle(handler_, address_, + Handler::kCFARegister); +} + +bool CallFrameInfo::State::DoRule(unsigned reg, Rule *rule) { + rules_.SetRegisterRule(reg, rule); + return rule->Handle(handler_, address_, reg); +} + +bool CallFrameInfo::State::DoOffset(unsigned reg, long offset) { + if (!rules_.CFARule()) { + reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset()); + return false; + } + return DoRule(reg, + new OffsetRule(Handler::kCFARegister, offset)); +} + +bool CallFrameInfo::State::DoValOffset(unsigned reg, long offset) { + if (!rules_.CFARule()) { + reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset()); + return false; + } + return DoRule(reg, + new ValOffsetRule(Handler::kCFARegister, offset)); +} + +bool CallFrameInfo::State::DoRestore(unsigned reg) { + // DW_CFA_restore and DW_CFA_restore_extended don't make sense in a CIE. + if (entry_->kind == kCIE) { + reporter_->RestoreInCIE(entry_->offset, CursorOffset()); + return false; + } + Rule *rule = cie_rules_.RegisterRule(reg); + if (!rule) { + // This isn't really the right thing to do, but since CFI generally + // only mentions callee-saves registers, and GCC's convention for + // callee-saves registers is that they are unchanged, it's a good + // approximation. + rule = new SameValueRule(); + } + return DoRule(reg, rule); +} + +bool CallFrameInfo::ReadEntryPrologue(const char *cursor, Entry *entry) { + const char *buffer_end = buffer_ + buffer_length_; + + // Initialize enough of ENTRY for use in error reporting. + entry->offset = cursor - buffer_; + entry->start = cursor; + entry->kind = kUnknown; + entry->end = NULL; + + // Read the initial length. This sets reader_'s offset size. + size_t length_size; + uint64 length = reader_->ReadInitialLength(cursor, &length_size); + if (length_size > size_t(buffer_end - cursor)) + return ReportIncomplete(entry); + cursor += length_size; + + // In a .eh_frame section, a length of zero marks the end of the series + // of entries. + if (length == 0 && eh_frame_) { + entry->kind = kTerminator; + entry->end = cursor; + return true; + } + + // Validate the length. + if (length > size_t(buffer_end - cursor)) + return ReportIncomplete(entry); + + // The length is the number of bytes after the initial length field; + // we have that position handy at this point, so compute the end + // now. (If we're parsing 64-bit-offset DWARF on a 32-bit machine, + // and the length didn't fit in a size_t, we would have rejected it + // above.) + entry->end = cursor + length; + + // Parse the next field: either the offset of a CIE or a CIE id. + size_t offset_size = reader_->OffsetSize(); + if (offset_size > size_t(entry->end - cursor)) return ReportIncomplete(entry); + entry->id = reader_->ReadOffset(cursor); + + // Don't advance cursor past id field yet; in .eh_frame data we need + // the id's position to compute the section offset of an FDE's CIE. + + // Now we can decide what kind of entry this is. + if (eh_frame_) { + // In .eh_frame data, an ID of zero marks the entry as a CIE, and + // anything else is an offset from the id field of the FDE to the start + // of the CIE. + if (entry->id == 0) { + entry->kind = kCIE; + } else { + entry->kind = kFDE; + // Turn the offset from the id into an offset from the buffer's start. + entry->id = (cursor - buffer_) - entry->id; + } + } else { + // In DWARF CFI data, an ID of ~0 (of the appropriate width, given the + // offset size for the entry) marks the entry as a CIE, and anything + // else is the offset of the CIE from the beginning of the section. + if (offset_size == 4) + entry->kind = (entry->id == 0xffffffff) ? kCIE : kFDE; + else { + assert(offset_size == 8); + entry->kind = (entry->id == 0xffffffffffffffffULL) ? kCIE : kFDE; + } + } + + // Now advance cursor past the id. + cursor += offset_size; + + // The fields specific to this kind of entry start here. + entry->fields = cursor; + + entry->cie = NULL; + + return true; +} + +bool CallFrameInfo::ReadCIEFields(CIE *cie) { + const char *cursor = cie->fields; + size_t len; + + assert(cie->kind == kCIE); + + // Prepare for early exit. + cie->version = 0; + cie->augmentation.clear(); + cie->code_alignment_factor = 0; + cie->data_alignment_factor = 0; + cie->return_address_register = 0; + cie->has_z_augmentation = false; + cie->pointer_encoding = DW_EH_PE_absptr; + cie->instructions = 0; + + // Parse the version number. + if (cie->end - cursor < 1) + return ReportIncomplete(cie); + cie->version = reader_->ReadOneByte(cursor); + cursor++; + + // If we don't recognize the version, we can't parse any more fields of the + // CIE. For DWARF CFI, we handle versions 1 through 3 (there was never a + // version 2 of CFI data). For .eh_frame, we handle versions 1 and 3 as well; + // the difference between those versions seems to be the same as for + // .debug_frame. + if (cie->version < 1 || cie->version > 3) { + reporter_->UnrecognizedVersion(cie->offset, cie->version); + return false; + } + + const char *augmentation_start = cursor; + const void *augmentation_end = + memchr(augmentation_start, '\0', cie->end - augmentation_start); + if (! augmentation_end) return ReportIncomplete(cie); + cursor = static_cast<const char *>(augmentation_end); + cie->augmentation = string(augmentation_start, + cursor - augmentation_start); + // Skip the terminating '\0'. + cursor++; + + // Is this CFI augmented? + if (!cie->augmentation.empty()) { + // Is it an augmentation we recognize? + if (cie->augmentation[0] == DW_Z_augmentation_start) { + // Linux C++ ABI 'z' augmentation, used for exception handling data. + cie->has_z_augmentation = true; + } else { + // Not an augmentation we recognize. Augmentations can have arbitrary + // effects on the form of rest of the content, so we have to give up. + reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation); + return false; + } + } + + // Parse the code alignment factor. + cie->code_alignment_factor = reader_->ReadUnsignedLEB128(cursor, &len); + if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie); + cursor += len; + + // Parse the data alignment factor. + cie->data_alignment_factor = reader_->ReadSignedLEB128(cursor, &len); + if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie); + cursor += len; + + // Parse the return address register. This is a ubyte in version 1, and + // a ULEB128 in version 3. + if (cie->version == 1) { + if (cursor >= cie->end) return ReportIncomplete(cie); + cie->return_address_register = uint8(*cursor++); + } else { + cie->return_address_register = reader_->ReadUnsignedLEB128(cursor, &len); + if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie); + cursor += len; + } + + // If we have a 'z' augmentation string, find the augmentation data and + // use the augmentation string to parse it. + if (cie->has_z_augmentation) { + uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &len); + if (size_t(cie->end - cursor) < len + data_size) + return ReportIncomplete(cie); + cursor += len; + const char *data = cursor; + cursor += data_size; + const char *data_end = cursor; + + cie->has_z_lsda = false; + cie->has_z_personality = false; + cie->has_z_signal_frame = false; + + // Walk the augmentation string, and extract values from the + // augmentation data as the string directs. + for (size_t i = 1; i < cie->augmentation.size(); i++) { + switch (cie->augmentation[i]) { + case DW_Z_has_LSDA: + // The CIE's augmentation data holds the language-specific data + // area pointer's encoding, and the FDE's augmentation data holds + // the pointer itself. + cie->has_z_lsda = true; + // Fetch the LSDA encoding from the augmentation data. + if (data >= data_end) return ReportIncomplete(cie); + cie->lsda_encoding = DwarfPointerEncoding(*data++); + if (!reader_->ValidEncoding(cie->lsda_encoding)) { + reporter_->InvalidPointerEncoding(cie->offset, cie->lsda_encoding); + return false; + } + // Don't check if the encoding is usable here --- we haven't + // read the FDE's fields yet, so we're not prepared for + // DW_EH_PE_funcrel, although that's a fine encoding for the + // LSDA to use, since it appears in the FDE. + break; + + case DW_Z_has_personality_routine: + // The CIE's augmentation data holds the personality routine + // pointer's encoding, followed by the pointer itself. + cie->has_z_personality = true; + // Fetch the personality routine pointer's encoding from the + // augmentation data. + if (data >= data_end) return ReportIncomplete(cie); + cie->personality_encoding = DwarfPointerEncoding(*data++); + if (!reader_->ValidEncoding(cie->personality_encoding)) { + reporter_->InvalidPointerEncoding(cie->offset, + cie->personality_encoding); + return false; + } + if (!reader_->UsableEncoding(cie->personality_encoding)) { + reporter_->UnusablePointerEncoding(cie->offset, + cie->personality_encoding); + return false; + } + // Fetch the personality routine's pointer itself from the data. + cie->personality_address = + reader_->ReadEncodedPointer(data, cie->personality_encoding, + &len); + if (len > size_t(data_end - data)) + return ReportIncomplete(cie); + data += len; + break; + + case DW_Z_has_FDE_address_encoding: + // The CIE's augmentation data holds the pointer encoding to use + // for addresses in the FDE. + if (data >= data_end) return ReportIncomplete(cie); + cie->pointer_encoding = DwarfPointerEncoding(*data++); + if (!reader_->ValidEncoding(cie->pointer_encoding)) { + reporter_->InvalidPointerEncoding(cie->offset, + cie->pointer_encoding); + return false; + } + if (!reader_->UsableEncoding(cie->pointer_encoding)) { + reporter_->UnusablePointerEncoding(cie->offset, + cie->pointer_encoding); + return false; + } + break; + + case DW_Z_is_signal_trampoline: + // Frames using this CIE are signal delivery frames. + cie->has_z_signal_frame = true; + break; + + default: + // An augmentation we don't recognize. + reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation); + return false; + } + } + } + + // The CIE's instructions start here. + cie->instructions = cursor; + + return true; +} + +bool CallFrameInfo::ReadFDEFields(FDE *fde) { + const char *cursor = fde->fields; + size_t size; + + fde->address = reader_->ReadEncodedPointer(cursor, fde->cie->pointer_encoding, + &size); + if (size > size_t(fde->end - cursor)) + return ReportIncomplete(fde); + cursor += size; + reader_->SetFunctionBase(fde->address); + + // For the length, we strip off the upper nybble of the encoding used for + // the starting address. + DwarfPointerEncoding length_encoding = + DwarfPointerEncoding(fde->cie->pointer_encoding & 0x0f); + fde->size = reader_->ReadEncodedPointer(cursor, length_encoding, &size); + if (size > size_t(fde->end - cursor)) + return ReportIncomplete(fde); + cursor += size; + + // If the CIE has a 'z' augmentation string, then augmentation data + // appears here. + if (fde->cie->has_z_augmentation) { + uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &size); + if (size_t(fde->end - cursor) < size + data_size) + return ReportIncomplete(fde); + cursor += size; + + // In the abstract, we should walk the augmentation string, and extract + // items from the FDE's augmentation data as we encounter augmentation + // string characters that specify their presence: the ordering of items + // in the augmentation string determines the arrangement of values in + // the augmentation data. + // + // In practice, there's only ever one value in FDE augmentation data + // that we support --- the LSDA pointer --- and we have to bail if we + // see any unrecognized augmentation string characters. So if there is + // anything here at all, we know what it is, and where it starts. + if (fde->cie->has_z_lsda) { + // Check whether the LSDA's pointer encoding is usable now: only once + // we've parsed the FDE's starting address do we call reader_-> + // SetFunctionBase, so that the DW_EH_PE_funcrel encoding becomes + // usable. + if (!reader_->UsableEncoding(fde->cie->lsda_encoding)) { + reporter_->UnusablePointerEncoding(fde->cie->offset, + fde->cie->lsda_encoding); + return false; + } + + fde->lsda_address = + reader_->ReadEncodedPointer(cursor, fde->cie->lsda_encoding, &size); + if (size > data_size) + return ReportIncomplete(fde); + // Ideally, we would also complain here if there were unconsumed + // augmentation data. + } + + cursor += data_size; + } + + // The FDE's instructions start after those. + fde->instructions = cursor; + + return true; +} + +bool CallFrameInfo::Start() { + const char *buffer_end = buffer_ + buffer_length_; + const char *cursor; + bool all_ok = true; + const char *entry_end; + bool ok; + + // Traverse all the entries in buffer_, skipping CIEs and offering + // FDEs to the handler. + for (cursor = buffer_; cursor < buffer_end; + cursor = entry_end, all_ok = all_ok && ok) { + FDE fde; + + // Make it easy to skip this entry with 'continue': assume that + // things are not okay until we've checked all the data, and + // prepare the address of the next entry. + ok = false; + + // Read the entry's prologue. + if (!ReadEntryPrologue(cursor, &fde)) { + if (!fde.end) { + // If we couldn't even figure out this entry's extent, then we + // must stop processing entries altogether. + all_ok = false; + break; + } + entry_end = fde.end; + continue; + } + + // The next iteration picks up after this entry. + entry_end = fde.end; + + // Did we see an .eh_frame terminating mark? + if (fde.kind == kTerminator) { + // If there appears to be more data left in the section after the + // terminating mark, warn the user. But this is just a warning; + // we leave all_ok true. + if (fde.end < buffer_end) reporter_->EarlyEHTerminator(fde.offset); + break; + } + + // In this loop, we skip CIEs. We only parse them fully when we + // parse an FDE that refers to them. This limits our memory + // consumption (beyond the buffer itself) to that needed to + // process the largest single entry. + if (fde.kind != kFDE) { + ok = true; + continue; + } + + // Validate the CIE pointer. + if (fde.id > buffer_length_) { + reporter_->CIEPointerOutOfRange(fde.offset, fde.id); + continue; + } + + CIE cie; + + // Parse this FDE's CIE header. + if (!ReadEntryPrologue(buffer_ + fde.id, &cie)) + continue; + // This had better be an actual CIE. + if (cie.kind != kCIE) { + reporter_->BadCIEId(fde.offset, fde.id); + continue; + } + if (!ReadCIEFields(&cie)) + continue; + + // We now have the values that govern both the CIE and the FDE. + cie.cie = &cie; + fde.cie = &cie; + + // Parse the FDE's header. + if (!ReadFDEFields(&fde)) + continue; + + // Call Entry to ask the consumer if they're interested. + if (!handler_->Entry(fde.offset, fde.address, fde.size, + cie.version, cie.augmentation, + cie.return_address_register)) { + // The handler isn't interested in this entry. That's not an error. + ok = true; + continue; + } + + if (cie.has_z_augmentation) { + // Report the personality routine address, if we have one. + if (cie.has_z_personality) { + if (!handler_ + ->PersonalityRoutine(cie.personality_address, + IsIndirectEncoding(cie.personality_encoding))) + continue; + } + + // Report the language-specific data area address, if we have one. + if (cie.has_z_lsda) { + if (!handler_ + ->LanguageSpecificDataArea(fde.lsda_address, + IsIndirectEncoding(cie.lsda_encoding))) + continue; + } + + // If this is a signal-handling frame, report that. + if (cie.has_z_signal_frame) { + if (!handler_->SignalHandler()) + continue; + } + } + + // Interpret the CIE's instructions, and then the FDE's instructions. + State state(reader_, handler_, reporter_, fde.address); + ok = state.InterpretCIE(cie) && state.InterpretFDE(fde); + + // Tell the ByteReader that the function start address from the + // FDE header is no longer valid. + reader_->ClearFunctionBase(); + + // Report the end of the entry. + handler_->End(); + } + + return all_ok; +} + +const char *CallFrameInfo::KindName(EntryKind kind) { + if (kind == CallFrameInfo::kUnknown) + return "entry"; + else if (kind == CallFrameInfo::kCIE) + return "common information entry"; + else if (kind == CallFrameInfo::kFDE) + return "frame description entry"; + else { + assert (kind == CallFrameInfo::kTerminator); + return ".eh_frame sequence terminator"; + } +} + +bool CallFrameInfo::ReportIncomplete(Entry *entry) { + reporter_->Incomplete(entry->offset, entry->kind); + return false; +} + +void CallFrameInfo::Reporter::Incomplete(uint64 offset, + CallFrameInfo::EntryKind kind) { + fprintf(stderr, + "%s: CFI %s at offset 0x%llx in '%s': entry ends early\n", + filename_.c_str(), CallFrameInfo::KindName(kind), offset, + section_.c_str()); +} + +void CallFrameInfo::Reporter::EarlyEHTerminator(uint64 offset) { + fprintf(stderr, + "%s: CFI at offset 0x%llx in '%s': saw end-of-data marker" + " before end of section contents\n", + filename_.c_str(), offset, section_.c_str()); +} + +void CallFrameInfo::Reporter::CIEPointerOutOfRange(uint64 offset, + uint64 cie_offset) { + fprintf(stderr, + "%s: CFI frame description entry at offset 0x%llx in '%s':" + " CIE pointer is out of range: 0x%llx\n", + filename_.c_str(), offset, section_.c_str(), cie_offset); +} + +void CallFrameInfo::Reporter::BadCIEId(uint64 offset, uint64 cie_offset) { + fprintf(stderr, + "%s: CFI frame description entry at offset 0x%llx in '%s':" + " CIE pointer does not point to a CIE: 0x%llx\n", + filename_.c_str(), offset, section_.c_str(), cie_offset); +} + +void CallFrameInfo::Reporter::UnrecognizedVersion(uint64 offset, int version) { + fprintf(stderr, + "%s: CFI frame description entry at offset 0x%llx in '%s':" + " CIE specifies unrecognized version: %d\n", + filename_.c_str(), offset, section_.c_str(), version); +} + +void CallFrameInfo::Reporter::UnrecognizedAugmentation(uint64 offset, + const string &aug) { + fprintf(stderr, + "%s: CFI frame description entry at offset 0x%llx in '%s':" + " CIE specifies unrecognized augmentation: '%s'\n", + filename_.c_str(), offset, section_.c_str(), aug.c_str()); +} + +void CallFrameInfo::Reporter::InvalidPointerEncoding(uint64 offset, + uint8 encoding) { + fprintf(stderr, + "%s: CFI common information entry at offset 0x%llx in '%s':" + " 'z' augmentation specifies invalid pointer encoding: 0x%02x\n", + filename_.c_str(), offset, section_.c_str(), encoding); +} + +void CallFrameInfo::Reporter::UnusablePointerEncoding(uint64 offset, + uint8 encoding) { + fprintf(stderr, + "%s: CFI common information entry at offset 0x%llx in '%s':" + " 'z' augmentation specifies a pointer encoding for which" + " we have no base address: 0x%02x\n", + filename_.c_str(), offset, section_.c_str(), encoding); +} + +void CallFrameInfo::Reporter::RestoreInCIE(uint64 offset, uint64 insn_offset) { + fprintf(stderr, + "%s: CFI common information entry at offset 0x%llx in '%s':" + " the DW_CFA_restore instruction at offset 0x%llx" + " cannot be used in a common information entry\n", + filename_.c_str(), offset, section_.c_str(), insn_offset); +} + +void CallFrameInfo::Reporter::BadInstruction(uint64 offset, + CallFrameInfo::EntryKind kind, + uint64 insn_offset) { + fprintf(stderr, + "%s: CFI %s at offset 0x%llx in section '%s':" + " the instruction at offset 0x%llx is unrecognized\n", + filename_.c_str(), CallFrameInfo::KindName(kind), + offset, section_.c_str(), insn_offset); +} + +void CallFrameInfo::Reporter::NoCFARule(uint64 offset, + CallFrameInfo::EntryKind kind, + uint64 insn_offset) { + fprintf(stderr, + "%s: CFI %s at offset 0x%llx in section '%s':" + " the instruction at offset 0x%llx assumes that a CFA rule has" + " been set, but none has been set\n", + filename_.c_str(), CallFrameInfo::KindName(kind), offset, + section_.c_str(), insn_offset); +} + +void CallFrameInfo::Reporter::EmptyStateStack(uint64 offset, + CallFrameInfo::EntryKind kind, + uint64 insn_offset) { + fprintf(stderr, + "%s: CFI %s at offset 0x%llx in section '%s':" + " the DW_CFA_restore_state instruction at offset 0x%llx" + " should pop a saved state from the stack, but the stack is empty\n", + filename_.c_str(), CallFrameInfo::KindName(kind), offset, + section_.c_str(), insn_offset); +} + +void CallFrameInfo::Reporter::ClearingCFARule(uint64 offset, + CallFrameInfo::EntryKind kind, + uint64 insn_offset) { + fprintf(stderr, + "%s: CFI %s at offset 0x%llx in section '%s':" + " the DW_CFA_restore_state instruction at offset 0x%llx" + " would clear the CFA rule in effect\n", + filename_.c_str(), CallFrameInfo::KindName(kind), offset, + section_.c_str(), insn_offset); +} + +} // namespace dwarf2reader diff --git a/3rdParty/Breakpad/src/common/dwarf/dwarf2reader.h b/3rdParty/Breakpad/src/common/dwarf/dwarf2reader.h new file mode 100644 index 0000000..ecf4eb2 --- /dev/null +++ b/3rdParty/Breakpad/src/common/dwarf/dwarf2reader.h @@ -0,0 +1,1051 @@ +// -*- mode: C++ -*- + +// Copyright (c) 2010 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// This file contains definitions related to the DWARF2/3 reader and +// it's handler interfaces. +// The DWARF2/3 specification can be found at +// http://dwarf.freestandards.org and should be considered required +// reading if you wish to modify the implementation. +// Only a cursory attempt is made to explain terminology that is +// used here, as it is much better explained in the standard documents +#ifndef COMMON_DWARF_DWARF2READER_H__ +#define COMMON_DWARF_DWARF2READER_H__ + +#include <list> +#include <map> +#include <string> +#include <utility> +#include <vector> + +#include "common/dwarf/bytereader.h" +#include "common/dwarf/dwarf2enums.h" +#include "common/dwarf/types.h" +#include "common/using_std_string.h" + +namespace dwarf2reader { +struct LineStateMachine; +class Dwarf2Handler; +class LineInfoHandler; + +// This maps from a string naming a section to a pair containing a +// the data for the section, and the size of the section. +typedef std::map<string, std::pair<const char*, uint64> > SectionMap; +typedef std::list<std::pair<enum DwarfAttribute, enum DwarfForm> > + AttributeList; +typedef AttributeList::iterator AttributeIterator; +typedef AttributeList::const_iterator ConstAttributeIterator; + +struct LineInfoHeader { + uint64 total_length; + uint16 version; + uint64 prologue_length; + uint8 min_insn_length; // insn stands for instructin + bool default_is_stmt; // stmt stands for statement + int8 line_base; + uint8 line_range; + uint8 opcode_base; + // Use a pointer so that signalsafe_addr2line is able to use this structure + // without heap allocation problem. + std::vector<unsigned char> *std_opcode_lengths; +}; + +class LineInfo { + public: + + // Initializes a .debug_line reader. Buffer and buffer length point + // to the beginning and length of the line information to read. + // Reader is a ByteReader class that has the endianness set + // properly. + LineInfo(const char* buffer_, uint64 buffer_length, + ByteReader* reader, LineInfoHandler* handler); + + virtual ~LineInfo() { + if (header_.std_opcode_lengths) { + delete header_.std_opcode_lengths; + } + } + + // Start processing line info, and calling callbacks in the handler. + // Consumes the line number information for a single compilation unit. + // Returns the number of bytes processed. + uint64 Start(); + + // Process a single line info opcode at START using the state + // machine at LSM. Return true if we should define a line using the + // current state of the line state machine. Place the length of the + // opcode in LEN. + // If LSM_PASSES_PC is non-NULL, this function also checks if the lsm + // passes the address of PC. In other words, LSM_PASSES_PC will be + // set to true, if the following condition is met. + // + // lsm's old address < PC <= lsm's new address + static bool ProcessOneOpcode(ByteReader* reader, + LineInfoHandler* handler, + const struct LineInfoHeader &header, + const char* start, + struct LineStateMachine* lsm, + size_t* len, + uintptr pc, + bool *lsm_passes_pc); + + private: + // Reads the DWARF2/3 header for this line info. + void ReadHeader(); + + // Reads the DWARF2/3 line information + void ReadLines(); + + // The associated handler to call processing functions in + LineInfoHandler* handler_; + + // The associated ByteReader that handles endianness issues for us + ByteReader* reader_; + + // A DWARF2/3 line info header. This is not the same size as + // in the actual file, as the one in the file may have a 32 bit or + // 64 bit lengths + + struct LineInfoHeader header_; + + // buffer is the buffer for our line info, starting at exactly where + // the line info to read is. after_header is the place right after + // the end of the line information header. + const char* buffer_; + uint64 buffer_length_; + const char* after_header_; +}; + +// This class is the main interface between the line info reader and +// the client. The virtual functions inside this get called for +// interesting events that happen during line info reading. The +// default implementation does nothing + +class LineInfoHandler { + public: + LineInfoHandler() { } + + virtual ~LineInfoHandler() { } + + // Called when we define a directory. NAME is the directory name, + // DIR_NUM is the directory number + virtual void DefineDir(const string& name, uint32 dir_num) { } + + // Called when we define a filename. NAME is the filename, FILE_NUM + // is the file number which is -1 if the file index is the next + // index after the last numbered index (this happens when files are + // dynamically defined by the line program), DIR_NUM is the + // directory index for the directory name of this file, MOD_TIME is + // the modification time of the file, and LENGTH is the length of + // the file + virtual void DefineFile(const string& name, int32 file_num, + uint32 dir_num, uint64 mod_time, + uint64 length) { } + + // Called when the line info reader has a new line, address pair + // ready for us. ADDRESS is the address of the code, LENGTH is the + // length of its machine code in bytes, FILE_NUM is the file number + // containing the code, LINE_NUM is the line number in that file for + // the code, and COLUMN_NUM is the column number the code starts at, + // if we know it (0 otherwise). + virtual void AddLine(uint64 address, uint64 length, + uint32 file_num, uint32 line_num, uint32 column_num) { } +}; + +// The base of DWARF2/3 debug info is a DIE (Debugging Information +// Entry. +// DWARF groups DIE's into a tree and calls the root of this tree a +// "compilation unit". Most of the time, there is one compilation +// unit in the .debug_info section for each file that had debug info +// generated. +// Each DIE consists of + +// 1. a tag specifying a thing that is being described (ie +// DW_TAG_subprogram for functions, DW_TAG_variable for variables, etc +// 2. attributes (such as DW_AT_location for location in memory, +// DW_AT_name for name), and data for each attribute. +// 3. A flag saying whether the DIE has children or not + +// In order to gain some amount of compression, the format of +// each DIE (tag name, attributes and data forms for the attributes) +// are stored in a separate table called the "abbreviation table". +// This is done because a large number of DIEs have the exact same tag +// and list of attributes, but different data for those attributes. +// As a result, the .debug_info section is just a stream of data, and +// requires reading of the .debug_abbrev section to say what the data +// means. + +// As a warning to the user, it should be noted that the reason for +// using absolute offsets from the beginning of .debug_info is that +// DWARF2/3 supports referencing DIE's from other DIE's by their offset +// from either the current compilation unit start, *or* the beginning +// of the .debug_info section. This means it is possible to reference +// a DIE in one compilation unit from a DIE in another compilation +// unit. This style of reference is usually used to eliminate +// duplicated information that occurs across compilation +// units, such as base types, etc. GCC 3.4+ support this with +// -feliminate-dwarf2-dups. Other toolchains will sometimes do +// duplicate elimination in the linker. + +class CompilationUnit { + public: + + // Initialize a compilation unit. This requires a map of sections, + // the offset of this compilation unit in the .debug_info section, a + // ByteReader, and a Dwarf2Handler class to call callbacks in. + CompilationUnit(const SectionMap& sections, uint64 offset, + ByteReader* reader, Dwarf2Handler* handler); + virtual ~CompilationUnit() { + if (abbrevs_) delete abbrevs_; + } + + // Begin reading a Dwarf2 compilation unit, and calling the + // callbacks in the Dwarf2Handler + + // Return the full length of the compilation unit, including + // headers. This plus the starting offset passed to the constructor + // is the offset of the end of the compilation unit --- and the + // start of the next compilation unit, if there is one. + uint64 Start(); + + private: + + // This struct represents a single DWARF2/3 abbreviation + // The abbreviation tells how to read a DWARF2/3 DIE, and consist of a + // tag and a list of attributes, as well as the data form of each attribute. + struct Abbrev { + uint64 number; + enum DwarfTag tag; + bool has_children; + AttributeList attributes; + }; + + // A DWARF2/3 compilation unit header. This is not the same size as + // in the actual file, as the one in the file may have a 32 bit or + // 64 bit length. + struct CompilationUnitHeader { + uint64 length; + uint16 version; + uint64 abbrev_offset; + uint8 address_size; + } header_; + + // Reads the DWARF2/3 header for this compilation unit. + void ReadHeader(); + + // Reads the DWARF2/3 abbreviations for this compilation unit + void ReadAbbrevs(); + + // Processes a single DIE for this compilation unit and return a new + // pointer just past the end of it + const char* ProcessDIE(uint64 dieoffset, + const char* start, + const Abbrev& abbrev); + + // Processes a single attribute and return a new pointer just past the + // end of it + const char* ProcessAttribute(uint64 dieoffset, + const char* start, + enum DwarfAttribute attr, + enum DwarfForm form); + + // Processes all DIEs for this compilation unit + void ProcessDIEs(); + + // Skips the die with attributes specified in ABBREV starting at + // START, and return the new place to position the stream to. + const char* SkipDIE(const char* start, + const Abbrev& abbrev); + + // Skips the attribute starting at START, with FORM, and return the + // new place to position the stream to. + const char* SkipAttribute(const char* start, + enum DwarfForm form); + + // Offset from section start is the offset of this compilation unit + // from the beginning of the .debug_info section. + uint64 offset_from_section_start_; + + // buffer is the buffer for our CU, starting at .debug_info + offset + // passed in from constructor. + // after_header points to right after the compilation unit header. + const char* buffer_; + uint64 buffer_length_; + const char* after_header_; + + // The associated ByteReader that handles endianness issues for us + ByteReader* reader_; + + // The map of sections in our file to buffers containing their data + const SectionMap& sections_; + + // The associated handler to call processing functions in + Dwarf2Handler* handler_; + + // Set of DWARF2/3 abbreviations for this compilation unit. Indexed + // by abbreviation number, which means that abbrevs_[0] is not + // valid. + std::vector<Abbrev>* abbrevs_; + + // String section buffer and length, if we have a string section. + // This is here to avoid doing a section lookup for strings in + // ProcessAttribute, which is in the hot path for DWARF2 reading. + const char* string_buffer_; + uint64 string_buffer_length_; +}; + +// This class is the main interface between the reader and the +// client. The virtual functions inside this get called for +// interesting events that happen during DWARF2 reading. +// The default implementation skips everything. + +class Dwarf2Handler { + public: + Dwarf2Handler() { } + + virtual ~Dwarf2Handler() { } + + // Start to process a compilation unit at OFFSET from the beginning of the + // .debug_info section. Return false if you would like to skip this + // compilation unit. + virtual bool StartCompilationUnit(uint64 offset, uint8 address_size, + uint8 offset_size, uint64 cu_length, + uint8 dwarf_version) { return false; } + + // Start to process a DIE at OFFSET from the beginning of the .debug_info + // section. Return false if you would like to skip this DIE. + virtual bool StartDIE(uint64 offset, enum DwarfTag tag, + const AttributeList& attrs) { return false; } + + // Called when we have an attribute with unsigned data to give to our + // handler. The attribute is for the DIE at OFFSET from the beginning of the + // .debug_info section. Its name is ATTR, its form is FORM, and its value is + // DATA. + virtual void ProcessAttributeUnsigned(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64 data) { } + + // Called when we have an attribute with signed data to give to our handler. + // The attribute is for the DIE at OFFSET from the beginning of the + // .debug_info section. Its name is ATTR, its form is FORM, and its value is + // DATA. + virtual void ProcessAttributeSigned(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + int64 data) { } + + // Called when we have an attribute whose value is a reference to + // another DIE. The attribute belongs to the DIE at OFFSET from the + // beginning of the .debug_info section. Its name is ATTR, its form + // is FORM, and the offset of the DIE being referred to from the + // beginning of the .debug_info section is DATA. + virtual void ProcessAttributeReference(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64 data) { } + + // Called when we have an attribute with a buffer of data to give to our + // handler. The attribute is for the DIE at OFFSET from the beginning of the + // .debug_info section. Its name is ATTR, its form is FORM, DATA points to + // the buffer's contents, and its length in bytes is LENGTH. The buffer is + // owned by the caller, not the callee, and may not persist for very long. + // If you want the data to be available later, it needs to be copied. + virtual void ProcessAttributeBuffer(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + const char* data, + uint64 len) { } + + // Called when we have an attribute with string data to give to our handler. + // The attribute is for the DIE at OFFSET from the beginning of the + // .debug_info section. Its name is ATTR, its form is FORM, and its value is + // DATA. + virtual void ProcessAttributeString(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + const string& data) { } + + // Called when we have an attribute whose value is the 64-bit signature + // of a type unit in the .debug_types section. OFFSET is the offset of + // the DIE whose attribute we're reporting. ATTR and FORM are the + // attribute's name and form. SIGNATURE is the type unit's signature. + virtual void ProcessAttributeSignature(uint64 offset, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64 signature) { } + + // Called when finished processing the DIE at OFFSET. + // Because DWARF2/3 specifies a tree of DIEs, you may get starts + // before ends of the previous DIE, as we process children before + // ending the parent. + virtual void EndDIE(uint64 offset) { } + +}; + +// This class is a reader for DWARF's Call Frame Information. CFI +// describes how to unwind stack frames --- even for functions that do +// not follow fixed conventions for saving registers, whose frame size +// varies as they execute, etc. +// +// CFI describes, at each machine instruction, how to compute the +// stack frame's base address, how to find the return address, and +// where to find the saved values of the caller's registers (if the +// callee has stashed them somewhere to free up the registers for its +// own use). +// +// For example, suppose we have a function whose machine code looks +// like this (imagine an assembly language that looks like C, for a +// machine with 32-bit registers, and a stack that grows towards lower +// addresses): +// +// func: ; entry point; return address at sp +// func+0: sp = sp - 16 ; allocate space for stack frame +// func+1: sp[12] = r0 ; save r0 at sp+12 +// ... ; other code, not frame-related +// func+10: sp -= 4; *sp = x ; push some x on the stack +// ... ; other code, not frame-related +// func+20: r0 = sp[16] ; restore saved r0 +// func+21: sp += 20 ; pop whole stack frame +// func+22: pc = *sp; sp += 4 ; pop return address and jump to it +// +// DWARF CFI is (a very compressed representation of) a table with a +// row for each machine instruction address and a column for each +// register showing how to restore it, if possible. +// +// A special column named "CFA", for "Canonical Frame Address", tells how +// to compute the base address of the frame; registers' entries may +// refer to the CFA in describing where the registers are saved. +// +// Another special column, named "RA", represents the return address. +// +// For example, here is a complete (uncompressed) table describing the +// function above: +// +// insn cfa r0 r1 ... ra +// ======================================= +// func+0: sp cfa[0] +// func+1: sp+16 cfa[0] +// func+2: sp+16 cfa[-4] cfa[0] +// func+11: sp+20 cfa[-4] cfa[0] +// func+21: sp+20 cfa[0] +// func+22: sp cfa[0] +// +// Some things to note here: +// +// - Each row describes the state of affairs *before* executing the +// instruction at the given address. Thus, the row for func+0 +// describes the state before we allocate the stack frame. In the +// next row, the formula for computing the CFA has changed, +// reflecting that allocation. +// +// - The other entries are written in terms of the CFA; this allows +// them to remain unchanged as the stack pointer gets bumped around. +// For example, the rule for recovering the return address (the "ra" +// column) remains unchanged throughout the function, even as the +// stack pointer takes on three different offsets from the return +// address. +// +// - Although we haven't shown it, most calling conventions designate +// "callee-saves" and "caller-saves" registers. The callee must +// preserve the values of callee-saves registers; if it uses them, +// it must save their original values somewhere, and restore them +// before it returns. In contrast, the callee is free to trash +// caller-saves registers; if the callee uses these, it will +// probably not bother to save them anywhere, and the CFI will +// probably mark their values as "unrecoverable". +// +// (However, since the caller cannot assume the callee was going to +// save them, caller-saves registers are probably dead in the caller +// anyway, so compilers usually don't generate CFA for caller-saves +// registers.) +// +// - Exactly where the CFA points is a matter of convention that +// depends on the architecture and ABI in use. In the example, the +// CFA is the value the stack pointer had upon entry to the +// function, pointing at the saved return address. But on the x86, +// the call frame information generated by GCC follows the +// convention that the CFA is the address *after* the saved return +// address. +// +// But by definition, the CFA remains constant throughout the +// lifetime of the frame. This makes it a useful value for other +// columns to refer to. It is also gives debuggers a useful handle +// for identifying a frame. +// +// If you look at the table above, you'll notice that a given entry is +// often the same as the one immediately above it: most instructions +// change only one or two aspects of the stack frame, if they affect +// it at all. The DWARF format takes advantage of this fact, and +// reduces the size of the data by mentioning only the addresses and +// columns at which changes take place. So for the above, DWARF CFI +// data would only actually mention the following: +// +// insn cfa r0 r1 ... ra +// ======================================= +// func+0: sp cfa[0] +// func+1: sp+16 +// func+2: cfa[-4] +// func+11: sp+20 +// func+21: r0 +// func+22: sp +// +// In fact, this is the way the parser reports CFI to the consumer: as +// a series of statements of the form, "At address X, column Y changed +// to Z," and related conventions for describing the initial state. +// +// Naturally, it would be impractical to have to scan the entire +// program's CFI, noting changes as we go, just to recover the +// unwinding rules in effect at one particular instruction. To avoid +// this, CFI data is grouped into "entries", each of which covers a +// specified range of addresses and begins with a complete statement +// of the rules for all recoverable registers at that starting +// address. Each entry typically covers a single function. +// +// Thus, to compute the contents of a given row of the table --- that +// is, rules for recovering the CFA, RA, and registers at a given +// instruction --- the consumer should find the entry that covers that +// instruction's address, start with the initial state supplied at the +// beginning of the entry, and work forward until it has processed all +// the changes up to and including those for the present instruction. +// +// There are seven kinds of rules that can appear in an entry of the +// table: +// +// - "undefined": The given register is not preserved by the callee; +// its value cannot be recovered. +// +// - "same value": This register has the same value it did in the callee. +// +// - offset(N): The register is saved at offset N from the CFA. +// +// - val_offset(N): The value the register had in the caller is the +// CFA plus offset N. (This is usually only useful for describing +// the stack pointer.) +// +// - register(R): The register's value was saved in another register R. +// +// - expression(E): Evaluating the DWARF expression E using the +// current frame's registers' values yields the address at which the +// register was saved. +// +// - val_expression(E): Evaluating the DWARF expression E using the +// current frame's registers' values yields the value the register +// had in the caller. + +class CallFrameInfo { + public: + // The different kinds of entries one finds in CFI. Used internally, + // and for error reporting. + enum EntryKind { kUnknown, kCIE, kFDE, kTerminator }; + + // The handler class to which the parser hands the parsed call frame + // information. Defined below. + class Handler; + + // A reporter class, which CallFrameInfo uses to report errors + // encountered while parsing call frame information. Defined below. + class Reporter; + + // Create a DWARF CFI parser. BUFFER points to the contents of the + // .debug_frame section to parse; BUFFER_LENGTH is its length in bytes. + // REPORTER is an error reporter the parser should use to report + // problems. READER is a ByteReader instance that has the endianness and + // address size set properly. Report the data we find to HANDLER. + // + // This class can also parse Linux C++ exception handling data, as found + // in '.eh_frame' sections. This data is a variant of DWARF CFI that is + // placed in loadable segments so that it is present in the program's + // address space, and is interpreted by the C++ runtime to search the + // call stack for a handler interested in the exception being thrown, + // actually pop the frames, and find cleanup code to run. + // + // There are two differences between the call frame information described + // in the DWARF standard and the exception handling data Linux places in + // the .eh_frame section: + // + // - Exception handling data uses uses a different format for call frame + // information entry headers. The distinguished CIE id, the way FDEs + // refer to their CIEs, and the way the end of the series of entries is + // determined are all slightly different. + // + // If the constructor's EH_FRAME argument is true, then the + // CallFrameInfo parses the entry headers as Linux C++ exception + // handling data. If EH_FRAME is false or omitted, the CallFrameInfo + // parses standard DWARF call frame information. + // + // - Linux C++ exception handling data uses CIE augmentation strings + // beginning with 'z' to specify the presence of additional data after + // the CIE and FDE headers and special encodings used for addresses in + // frame description entries. + // + // CallFrameInfo can handle 'z' augmentations in either DWARF CFI or + // exception handling data if you have supplied READER with the base + // addresses needed to interpret the pointer encodings that 'z' + // augmentations can specify. See the ByteReader interface for details + // about the base addresses. See the CallFrameInfo::Handler interface + // for details about the additional information one might find in + // 'z'-augmented data. + // + // Thus: + // + // - If you are parsing standard DWARF CFI, as found in a .debug_frame + // section, you should pass false for the EH_FRAME argument, or omit + // it, and you need not worry about providing READER with the + // additional base addresses. + // + // - If you want to parse Linux C++ exception handling data from a + // .eh_frame section, you should pass EH_FRAME as true, and call + // READER's Set*Base member functions before calling our Start method. + // + // - If you want to parse DWARF CFI that uses the 'z' augmentations + // (although I don't think any toolchain ever emits such data), you + // could pass false for EH_FRAME, but call READER's Set*Base members. + // + // The extensions the Linux C++ ABI makes to DWARF for exception + // handling are described here, rather poorly: + // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html + // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html + // + // The mechanics of C++ exception handling, personality routines, + // and language-specific data areas are described here, rather nicely: + // http://www.codesourcery.com/public/cxx-abi/abi-eh.html + CallFrameInfo(const char *buffer, size_t buffer_length, + ByteReader *reader, Handler *handler, Reporter *reporter, + bool eh_frame = false) + : buffer_(buffer), buffer_length_(buffer_length), + reader_(reader), handler_(handler), reporter_(reporter), + eh_frame_(eh_frame) { } + + ~CallFrameInfo() { } + + // Parse the entries in BUFFER, reporting what we find to HANDLER. + // Return true if we reach the end of the section successfully, or + // false if we encounter an error. + bool Start(); + + // Return the textual name of KIND. For error reporting. + static const char *KindName(EntryKind kind); + + private: + + struct CIE; + + // A CFI entry, either an FDE or a CIE. + struct Entry { + // The starting offset of the entry in the section, for error + // reporting. + size_t offset; + + // The start of this entry in the buffer. + const char *start; + + // Which kind of entry this is. + // + // We want to be able to use this for error reporting even while we're + // in the midst of parsing. Error reporting code may assume that kind, + // offset, and start fields are valid, although kind may be kUnknown. + EntryKind kind; + + // The end of this entry's common prologue (initial length and id), and + // the start of this entry's kind-specific fields. + const char *fields; + + // The start of this entry's instructions. + const char *instructions; + + // The address past the entry's last byte in the buffer. (Note that + // since offset points to the entry's initial length field, and the + // length field is the number of bytes after that field, this is not + // simply buffer_ + offset + length.) + const char *end; + + // For both DWARF CFI and .eh_frame sections, this is the CIE id in a + // CIE, and the offset of the associated CIE in an FDE. + uint64 id; + + // The CIE that applies to this entry, if we've parsed it. If this is a + // CIE, then this field points to this structure. + CIE *cie; + }; + + // A common information entry (CIE). + struct CIE: public Entry { + uint8 version; // CFI data version number + string augmentation; // vendor format extension markers + uint64 code_alignment_factor; // scale for code address adjustments + int data_alignment_factor; // scale for stack pointer adjustments + unsigned return_address_register; // which register holds the return addr + + // True if this CIE includes Linux C++ ABI 'z' augmentation data. + bool has_z_augmentation; + + // Parsed 'z' augmentation data. These are meaningful only if + // has_z_augmentation is true. + bool has_z_lsda; // The 'z' augmentation included 'L'. + bool has_z_personality; // The 'z' augmentation included 'P'. + bool has_z_signal_frame; // The 'z' augmentation included 'S'. + + // If has_z_lsda is true, this is the encoding to be used for language- + // specific data area pointers in FDEs. + DwarfPointerEncoding lsda_encoding; + + // If has_z_personality is true, this is the encoding used for the + // personality routine pointer in the augmentation data. + DwarfPointerEncoding personality_encoding; + + // If has_z_personality is true, this is the address of the personality + // routine --- or, if personality_encoding & DW_EH_PE_indirect, the + // address where the personality routine's address is stored. + uint64 personality_address; + + // This is the encoding used for addresses in the FDE header and + // in DW_CFA_set_loc instructions. This is always valid, whether + // or not we saw a 'z' augmentation string; its default value is + // DW_EH_PE_absptr, which is what normal DWARF CFI uses. + DwarfPointerEncoding pointer_encoding; + }; + + // A frame description entry (FDE). + struct FDE: public Entry { + uint64 address; // start address of described code + uint64 size; // size of described code, in bytes + + // If cie->has_z_lsda is true, then this is the language-specific data + // area's address --- or its address's address, if cie->lsda_encoding + // has the DW_EH_PE_indirect bit set. + uint64 lsda_address; + }; + + // Internal use. + class Rule; + class UndefinedRule; + class SameValueRule; + class OffsetRule; + class ValOffsetRule; + class RegisterRule; + class ExpressionRule; + class ValExpressionRule; + class RuleMap; + class State; + + // Parse the initial length and id of a CFI entry, either a CIE, an FDE, + // or a .eh_frame end-of-data mark. CURSOR points to the beginning of the + // data to parse. On success, populate ENTRY as appropriate, and return + // true. On failure, report the problem, and return false. Even if we + // return false, set ENTRY->end to the first byte after the entry if we + // were able to figure that out, or NULL if we weren't. + bool ReadEntryPrologue(const char *cursor, Entry *entry); + + // Parse the fields of a CIE after the entry prologue, including any 'z' + // augmentation data. Assume that the 'Entry' fields of CIE are + // populated; use CIE->fields and CIE->end as the start and limit for + // parsing. On success, populate the rest of *CIE, and return true; on + // failure, report the problem and return false. + bool ReadCIEFields(CIE *cie); + + // Parse the fields of an FDE after the entry prologue, including any 'z' + // augmentation data. Assume that the 'Entry' fields of *FDE are + // initialized; use FDE->fields and FDE->end as the start and limit for + // parsing. Assume that FDE->cie is fully initialized. On success, + // populate the rest of *FDE, and return true; on failure, report the + // problem and return false. + bool ReadFDEFields(FDE *fde); + + // Report that ENTRY is incomplete, and return false. This is just a + // trivial wrapper for invoking reporter_->Incomplete; it provides a + // little brevity. + bool ReportIncomplete(Entry *entry); + + // Return true if ENCODING has the DW_EH_PE_indirect bit set. + static bool IsIndirectEncoding(DwarfPointerEncoding encoding) { + return encoding & DW_EH_PE_indirect; + } + + // The contents of the DWARF .debug_info section we're parsing. + const char *buffer_; + size_t buffer_length_; + + // For reading multi-byte values with the appropriate endianness. + ByteReader *reader_; + + // The handler to which we should report the data we find. + Handler *handler_; + + // For reporting problems in the info we're parsing. + Reporter *reporter_; + + // True if we are processing .eh_frame-format data. + bool eh_frame_; +}; + +// The handler class for CallFrameInfo. The a CFI parser calls the +// member functions of a handler object to report the data it finds. +class CallFrameInfo::Handler { + public: + // The pseudo-register number for the canonical frame address. + enum { kCFARegister = -1 }; + + Handler() { } + virtual ~Handler() { } + + // The parser has found CFI for the machine code at ADDRESS, + // extending for LENGTH bytes. OFFSET is the offset of the frame + // description entry in the section, for use in error messages. + // VERSION is the version number of the CFI format. AUGMENTATION is + // a string describing any producer-specific extensions present in + // the data. RETURN_ADDRESS is the number of the register that holds + // the address to which the function should return. + // + // Entry should return true to process this CFI, or false to skip to + // the next entry. + // + // The parser invokes Entry for each Frame Description Entry (FDE) + // it finds. The parser doesn't report Common Information Entries + // to the handler explicitly; instead, if the handler elects to + // process a given FDE, the parser reiterates the appropriate CIE's + // contents at the beginning of the FDE's rules. + virtual bool Entry(size_t offset, uint64 address, uint64 length, + uint8 version, const string &augmentation, + unsigned return_address) = 0; + + // When the Entry function returns true, the parser calls these + // handler functions repeatedly to describe the rules for recovering + // registers at each instruction in the given range of machine code. + // Immediately after a call to Entry, the handler should assume that + // the rule for each callee-saves register is "unchanged" --- that + // is, that the register still has the value it had in the caller. + // + // If a *Rule function returns true, we continue processing this entry's + // instructions. If a *Rule function returns false, we stop evaluating + // instructions, and skip to the next entry. Either way, we call End + // before going on to the next entry. + // + // In all of these functions, if the REG parameter is kCFARegister, then + // the rule describes how to find the canonical frame address. + // kCFARegister may be passed as a BASE_REGISTER argument, meaning that + // the canonical frame address should be used as the base address for the + // computation. All other REG values will be positive. + + // At ADDRESS, register REG's value is not recoverable. + virtual bool UndefinedRule(uint64 address, int reg) = 0; + + // At ADDRESS, register REG's value is the same as that it had in + // the caller. + virtual bool SameValueRule(uint64 address, int reg) = 0; + + // At ADDRESS, register REG has been saved at offset OFFSET from + // BASE_REGISTER. + virtual bool OffsetRule(uint64 address, int reg, + int base_register, long offset) = 0; + + // At ADDRESS, the caller's value of register REG is the current + // value of BASE_REGISTER plus OFFSET. (This rule doesn't provide an + // address at which the register's value is saved.) + virtual bool ValOffsetRule(uint64 address, int reg, + int base_register, long offset) = 0; + + // At ADDRESS, register REG has been saved in BASE_REGISTER. This differs + // from ValOffsetRule(ADDRESS, REG, BASE_REGISTER, 0), in that + // BASE_REGISTER is the "home" for REG's saved value: if you want to + // assign to a variable whose home is REG in the calling frame, you + // should put the value in BASE_REGISTER. + virtual bool RegisterRule(uint64 address, int reg, int base_register) = 0; + + // At ADDRESS, the DWARF expression EXPRESSION yields the address at + // which REG was saved. + virtual bool ExpressionRule(uint64 address, int reg, + const string &expression) = 0; + + // At ADDRESS, the DWARF expression EXPRESSION yields the caller's + // value for REG. (This rule doesn't provide an address at which the + // register's value is saved.) + virtual bool ValExpressionRule(uint64 address, int reg, + const string &expression) = 0; + + // Indicate that the rules for the address range reported by the + // last call to Entry are complete. End should return true if + // everything is okay, or false if an error has occurred and parsing + // should stop. + virtual bool End() = 0; + + // Handler functions for Linux C++ exception handling data. These are + // only called if the data includes 'z' augmentation strings. + + // The Linux C++ ABI uses an extension of the DWARF CFI format to + // walk the stack to propagate exceptions from the throw to the + // appropriate catch, and do the appropriate cleanups along the way. + // CFI entries used for exception handling have two additional data + // associated with them: + // + // - The "language-specific data area" describes which exception + // types the function has 'catch' clauses for, and indicates how + // to go about re-entering the function at the appropriate catch + // clause. If the exception is not caught, it describes the + // destructors that must run before the frame is popped. + // + // - The "personality routine" is responsible for interpreting the + // language-specific data area's contents, and deciding whether + // the exception should continue to propagate down the stack, + // perhaps after doing some cleanup for this frame, or whether the + // exception will be caught here. + // + // In principle, the language-specific data area is opaque to + // everybody but the personality routine. In practice, these values + // may be useful or interesting to readers with extra context, and + // we have to at least skip them anyway, so we might as well report + // them to the handler. + + // This entry's exception handling personality routine's address is + // ADDRESS. If INDIRECT is true, then ADDRESS is the address at + // which the routine's address is stored. The default definition for + // this handler function simply returns true, allowing parsing of + // the entry to continue. + virtual bool PersonalityRoutine(uint64 address, bool indirect) { + return true; + } + + // This entry's language-specific data area (LSDA) is located at + // ADDRESS. If INDIRECT is true, then ADDRESS is the address at + // which the area's address is stored. The default definition for + // this handler function simply returns true, allowing parsing of + // the entry to continue. + virtual bool LanguageSpecificDataArea(uint64 address, bool indirect) { + return true; + } + + // This entry describes a signal trampoline --- this frame is the + // caller of a signal handler. The default definition for this + // handler function simply returns true, allowing parsing of the + // entry to continue. + // + // The best description of the rationale for and meaning of signal + // trampoline CFI entries seems to be in the GCC bug database: + // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26208 + virtual bool SignalHandler() { return true; } +}; + +// The CallFrameInfo class makes calls on an instance of this class to +// report errors or warn about problems in the data it is parsing. The +// default definitions of these methods print a message to stderr, but +// you can make a derived class that overrides them. +class CallFrameInfo::Reporter { + public: + // Create an error reporter which attributes troubles to the section + // named SECTION in FILENAME. + // + // Normally SECTION would be .debug_frame, but the Mac puts CFI data + // in a Mach-O section named __debug_frame. If we support + // Linux-style exception handling data, we could be reading an + // .eh_frame section. + Reporter(const string &filename, + const string §ion = ".debug_frame") + : filename_(filename), section_(section) { } + virtual ~Reporter() { } + + // The CFI entry at OFFSET ends too early to be well-formed. KIND + // indicates what kind of entry it is; KIND can be kUnknown if we + // haven't parsed enough of the entry to tell yet. + virtual void Incomplete(uint64 offset, CallFrameInfo::EntryKind kind); + + // The .eh_frame data has a four-byte zero at OFFSET where the next + // entry's length would be; this is a terminator. However, the buffer + // length as given to the CallFrameInfo constructor says there should be + // more data. + virtual void EarlyEHTerminator(uint64 offset); + + // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the + // section is not that large. + virtual void CIEPointerOutOfRange(uint64 offset, uint64 cie_offset); + + // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the entry + // there is not a CIE. + virtual void BadCIEId(uint64 offset, uint64 cie_offset); + + // The FDE at OFFSET refers to a CIE with version number VERSION, + // which we don't recognize. We cannot parse DWARF CFI if it uses + // a version number we don't recognize. + virtual void UnrecognizedVersion(uint64 offset, int version); + + // The FDE at OFFSET refers to a CIE with augmentation AUGMENTATION, + // which we don't recognize. We cannot parse DWARF CFI if it uses + // augmentations we don't recognize. + virtual void UnrecognizedAugmentation(uint64 offset, + const string &augmentation); + + // The pointer encoding ENCODING, specified by the CIE at OFFSET, is not + // a valid encoding. + virtual void InvalidPointerEncoding(uint64 offset, uint8 encoding); + + // The pointer encoding ENCODING, specified by the CIE at OFFSET, depends + // on a base address which has not been supplied. + virtual void UnusablePointerEncoding(uint64 offset, uint8 encoding); + + // The CIE at OFFSET contains a DW_CFA_restore instruction at + // INSN_OFFSET, which may not appear in a CIE. + virtual void RestoreInCIE(uint64 offset, uint64 insn_offset); + + // The entry at OFFSET, of kind KIND, has an unrecognized + // instruction at INSN_OFFSET. + virtual void BadInstruction(uint64 offset, CallFrameInfo::EntryKind kind, + uint64 insn_offset); + + // The instruction at INSN_OFFSET in the entry at OFFSET, of kind + // KIND, establishes a rule that cites the CFA, but we have not + // established a CFA rule yet. + virtual void NoCFARule(uint64 offset, CallFrameInfo::EntryKind kind, + uint64 insn_offset); + + // The instruction at INSN_OFFSET in the entry at OFFSET, of kind + // KIND, is a DW_CFA_restore_state instruction, but the stack of + // saved states is empty. + virtual void EmptyStateStack(uint64 offset, CallFrameInfo::EntryKind kind, + uint64 insn_offset); + + // The DW_CFA_remember_state instruction at INSN_OFFSET in the entry + // at OFFSET, of kind KIND, would restore a state that has no CFA + // rule, whereas the current state does have a CFA rule. This is + // bogus input, which the CallFrameInfo::Handler interface doesn't + // (and shouldn't) have any way to report. + virtual void ClearingCFARule(uint64 offset, CallFrameInfo::EntryKind kind, + uint64 insn_offset); + + protected: + // The name of the file whose CFI we're reading. + string filename_; + + // The name of the CFI section in that file. + string section_; +}; + +} // namespace dwarf2reader + +#endif // UTIL_DEBUGINFO_DWARF2READER_H__ diff --git a/3rdParty/Breakpad/src/common/dwarf/line_state_machine.h b/3rdParty/Breakpad/src/common/dwarf/line_state_machine.h new file mode 100644 index 0000000..0ff72ab --- /dev/null +++ b/3rdParty/Breakpad/src/common/dwarf/line_state_machine.h @@ -0,0 +1,61 @@ +// Copyright 2008 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +#ifndef COMMON_DWARF_LINE_STATE_MACHINE_H__ +#define COMMON_DWARF_LINE_STATE_MACHINE_H__ + +namespace dwarf2reader { + +// This is the format of a DWARF2/3 line state machine that we process +// opcodes using. There is no need for anything outside the lineinfo +// processor to know how this works. +struct LineStateMachine { + void Reset(bool default_is_stmt) { + file_num = 1; + address = 0; + line_num = 1; + column_num = 0; + is_stmt = default_is_stmt; + basic_block = false; + end_sequence = false; + } + + uint32 file_num; + uint64 address; + uint32 line_num; + uint32 column_num; + bool is_stmt; // stmt means statement. + bool basic_block; + bool end_sequence; +}; + +} // namespace dwarf2reader + + +#endif // COMMON_DWARF_LINE_STATE_MACHINE_H__ diff --git a/3rdParty/Breakpad/src/common/dwarf/types.h b/3rdParty/Breakpad/src/common/dwarf/types.h new file mode 100644 index 0000000..61ca457 --- /dev/null +++ b/3rdParty/Breakpad/src/common/dwarf/types.h @@ -0,0 +1,55 @@ +// Copyright 2008 Google, Inc. All Rights reserved +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +// This file contains some typedefs for basic types + + +#ifndef _COMMON_DWARF_TYPES_H__ +#define _COMMON_DWARF_TYPES_H__ + +#include <stdint.h> + +typedef signed char int8; +typedef short int16; +typedef int int32; +typedef long long int64; + +typedef unsigned char uint8; +typedef unsigned short uint16; +typedef unsigned int uint32; +typedef unsigned long long uint64; + +#ifdef __PTRDIFF_TYPE__ +typedef __PTRDIFF_TYPE__ intptr; +typedef unsigned __PTRDIFF_TYPE__ uintptr; +#else +#error "Can't find pointer-sized integral types." +#endif + +#endif // _COMMON_DWARF_TYPES_H__ diff --git a/3rdParty/Breakpad/src/common/dwarf_cfi_to_module.cc b/3rdParty/Breakpad/src/common/dwarf_cfi_to_module.cc new file mode 100644 index 0000000..15904d7 --- /dev/null +++ b/3rdParty/Breakpad/src/common/dwarf_cfi_to_module.cc @@ -0,0 +1,258 @@ +// -*- mode: c++ -*- + +// Copyright (c) 2010, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// Implementation of google_breakpad::DwarfCFIToModule. +// See dwarf_cfi_to_module.h for details. + +#include <sstream> + +#include "common/dwarf_cfi_to_module.h" + +namespace google_breakpad { + +using std::ostringstream; + +vector<string> DwarfCFIToModule::RegisterNames::MakeVector( + const char * const *strings, + size_t size) { + vector<string> names(strings, strings + size); + return names; +} + +vector<string> DwarfCFIToModule::RegisterNames::I386() { + static const char *const names[] = { + "$eax", "$ecx", "$edx", "$ebx", "$esp", "$ebp", "$esi", "$edi", + "$eip", "$eflags", "$unused1", + "$st0", "$st1", "$st2", "$st3", "$st4", "$st5", "$st6", "$st7", + "$unused2", "$unused3", + "$xmm0", "$xmm1", "$xmm2", "$xmm3", "$xmm4", "$xmm5", "$xmm6", "$xmm7", + "$mm0", "$mm1", "$mm2", "$mm3", "$mm4", "$mm5", "$mm6", "$mm7", + "$fcw", "$fsw", "$mxcsr", + "$es", "$cs", "$ss", "$ds", "$fs", "$gs", "$unused4", "$unused5", + "$tr", "$ldtr" + }; + + return MakeVector(names, sizeof(names) / sizeof(names[0])); +} + +vector<string> DwarfCFIToModule::RegisterNames::X86_64() { + static const char *const names[] = { + "$rax", "$rdx", "$rcx", "$rbx", "$rsi", "$rdi", "$rbp", "$rsp", + "$r8", "$r9", "$r10", "$r11", "$r12", "$r13", "$r14", "$r15", + "$rip", + "$xmm0","$xmm1","$xmm2", "$xmm3", "$xmm4", "$xmm5", "$xmm6", "$xmm7", + "$xmm8","$xmm9","$xmm10","$xmm11","$xmm12","$xmm13","$xmm14","$xmm15", + "$st0", "$st1", "$st2", "$st3", "$st4", "$st5", "$st6", "$st7", + "$mm0", "$mm1", "$mm2", "$mm3", "$mm4", "$mm5", "$mm6", "$mm7", + "$rflags", + "$es", "$cs", "$ss", "$ds", "$fs", "$gs", "$unused1", "$unused2", + "$fs.base", "$gs.base", "$unused3", "$unused4", + "$tr", "$ldtr", + "$mxcsr", "$fcw", "$fsw" + }; + + return MakeVector(names, sizeof(names) / sizeof(names[0])); +} + +// Per ARM IHI 0040A, section 3.1 +vector<string> DwarfCFIToModule::RegisterNames::ARM() { + static const char *const names[] = { + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + "r8", "r9", "r10", "r11", "r12", "sp", "lr", "pc", + "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", + "fps", "cpsr", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", + "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15", + "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23", + "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", + "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7" + }; + + return MakeVector(names, sizeof(names) / sizeof(names[0])); +} + +bool DwarfCFIToModule::Entry(size_t offset, uint64 address, uint64 length, + uint8 version, const string &augmentation, + unsigned return_address) { + assert(!entry_); + + // If dwarf2reader::CallFrameInfo can handle this version and + // augmentation, then we should be okay with that, so there's no + // need to check them here. + + // Get ready to collect entries. + entry_ = new Module::StackFrameEntry; + entry_->address = address; + entry_->size = length; + entry_offset_ = offset; + return_address_ = return_address; + + // Breakpad STACK CFI records must provide a .ra rule, but DWARF CFI + // may not establish any rule for .ra if the return address column + // is an ordinary register, and that register holds the return + // address on entry to the function. So establish an initial .ra + // rule citing the return address register. + if (return_address_ < register_names_.size()) + entry_->initial_rules[ra_name_] = register_names_[return_address_]; + + return true; +} + +string DwarfCFIToModule::RegisterName(int i) { + assert(entry_); + if (i < 0) { + assert(i == kCFARegister); + return cfa_name_; + } + unsigned reg = i; + if (reg == return_address_) + return ra_name_; + + // Ensure that a non-empty name exists for this register value. + if (reg < register_names_.size() && !register_names_[reg].empty()) + return register_names_[reg]; + + reporter_->UnnamedRegister(entry_offset_, reg); + char buf[30]; + sprintf(buf, "unnamed_register%u", reg); + return buf; +} + +void DwarfCFIToModule::Record(Module::Address address, int reg, + const string &rule) { + assert(entry_); + + // Place the name in our global set of strings, and then use the string + // from the set. Even though the assignment looks like a copy, all the + // major std::string implementations use reference counting internally, + // so the effect is to have all our data structures share copies of rules + // whenever possible. Since register names are drawn from a + // vector<string>, register names are already shared. + string shared_rule = *common_strings_.insert(rule).first; + + // Is this one of this entry's initial rules? + if (address == entry_->address) + entry_->initial_rules[RegisterName(reg)] = shared_rule; + // File it under the appropriate address. + else + entry_->rule_changes[address][RegisterName(reg)] = shared_rule; +} + +bool DwarfCFIToModule::UndefinedRule(uint64 address, int reg) { + reporter_->UndefinedNotSupported(entry_offset_, RegisterName(reg)); + // Treat this as a non-fatal error. + return true; +} + +bool DwarfCFIToModule::SameValueRule(uint64 address, int reg) { + ostringstream s; + s << RegisterName(reg); + Record(address, reg, s.str()); + return true; +} + +bool DwarfCFIToModule::OffsetRule(uint64 address, int reg, + int base_register, long offset) { + ostringstream s; + s << RegisterName(base_register) << " " << offset << " + ^"; + Record(address, reg, s.str()); + return true; +} + +bool DwarfCFIToModule::ValOffsetRule(uint64 address, int reg, + int base_register, long offset) { + ostringstream s; + s << RegisterName(base_register) << " " << offset << " +"; + Record(address, reg, s.str()); + return true; +} + +bool DwarfCFIToModule::RegisterRule(uint64 address, int reg, + int base_register) { + ostringstream s; + s << RegisterName(base_register); + Record(address, reg, s.str()); + return true; +} + +bool DwarfCFIToModule::ExpressionRule(uint64 address, int reg, + const string &expression) { + reporter_->ExpressionsNotSupported(entry_offset_, RegisterName(reg)); + // Treat this as a non-fatal error. + return true; +} + +bool DwarfCFIToModule::ValExpressionRule(uint64 address, int reg, + const string &expression) { + reporter_->ExpressionsNotSupported(entry_offset_, RegisterName(reg)); + // Treat this as a non-fatal error. + return true; +} + +bool DwarfCFIToModule::End() { + module_->AddStackFrameEntry(entry_); + entry_ = NULL; + return true; +} + +void DwarfCFIToModule::Reporter::UnnamedRegister(size_t offset, int reg) { + fprintf(stderr, "%s, section '%s': " + "the call frame entry at offset 0x%zx refers to register %d," + " whose name we don't know\n", + file_.c_str(), section_.c_str(), offset, reg); +} + +void DwarfCFIToModule::Reporter::UndefinedNotSupported(size_t offset, + const string ®) { + fprintf(stderr, "%s, section '%s': " + "the call frame entry at offset 0x%zx sets the rule for " + "register '%s' to 'undefined', but the Breakpad symbol file format" + " cannot express this\n", + file_.c_str(), section_.c_str(), offset, reg.c_str()); +} + +void DwarfCFIToModule::Reporter::ExpressionsNotSupported(size_t offset, + const string ®) { + fprintf(stderr, "%s, section '%s': " + "the call frame entry at offset 0x%zx uses a DWARF expression to" + " describe how to recover register '%s', " + " but this translator cannot yet translate DWARF expressions to" + " Breakpad postfix expressions\n", + file_.c_str(), section_.c_str(), offset, reg.c_str()); +} + +} // namespace google_breakpad diff --git a/3rdParty/Breakpad/src/common/dwarf_cfi_to_module.h b/3rdParty/Breakpad/src/common/dwarf_cfi_to_module.h new file mode 100644 index 0000000..7db552a --- /dev/null +++ b/3rdParty/Breakpad/src/common/dwarf_cfi_to_module.h @@ -0,0 +1,196 @@ +// -*- mode: c++ -*- + +// Copyright (c) 2010, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// dwarf_cfi_to_module.h: Define the DwarfCFIToModule class, which +// accepts parsed DWARF call frame info and adds it to a +// google_breakpad::Module object, which can write that information to +// a Breakpad symbol file. + +#ifndef COMMON_LINUX_DWARF_CFI_TO_MODULE_H +#define COMMON_LINUX_DWARF_CFI_TO_MODULE_H + +#include <assert.h> +#include <stdio.h> + +#include <set> +#include <string> +#include <vector> + +#include "common/module.h" +#include "common/dwarf/dwarf2reader.h" +#include "common/using_std_string.h" + +namespace google_breakpad { + +using dwarf2reader::CallFrameInfo; +using google_breakpad::Module; +using std::set; +using std::vector; + +// A class that accepts parsed call frame information from the DWARF +// CFI parser and populates a google_breakpad::Module object with the +// contents. +class DwarfCFIToModule: public CallFrameInfo::Handler { + public: + + // DwarfCFIToModule uses an instance of this class to report errors + // detected while converting DWARF CFI to Breakpad STACK CFI records. + class Reporter { + public: + // Create a reporter that writes messages to the standard error + // stream. FILE is the name of the file we're processing, and + // SECTION is the name of the section within that file that we're + // looking at (.debug_frame, .eh_frame, etc.). + Reporter(const string &file, const string §ion) + : file_(file), section_(section) { } + virtual ~Reporter() { } + + // The DWARF CFI entry at OFFSET cites register REG, but REG is not + // covered by the vector of register names passed to the + // DwarfCFIToModule constructor, nor does it match the return + // address column number for this entry. + virtual void UnnamedRegister(size_t offset, int reg); + + // The DWARF CFI entry at OFFSET says that REG is undefined, but the + // Breakpad symbol file format cannot express this. + virtual void UndefinedNotSupported(size_t offset, const string ®); + + // The DWARF CFI entry at OFFSET says that REG uses a DWARF + // expression to find its value, but DwarfCFIToModule is not + // capable of translating DWARF expressions to Breakpad postfix + // expressions. + virtual void ExpressionsNotSupported(size_t offset, const string ®); + + protected: + string file_, section_; + }; + + // Register name tables. If TABLE is a vector returned by one of these + // functions, then TABLE[R] is the name of the register numbered R in + // DWARF call frame information. + class RegisterNames { + public: + // Intel's "x86" or IA-32. + static vector<string> I386(); + + // AMD x86_64, AMD64, Intel EM64T, or Intel 64 + static vector<string> X86_64(); + + // ARM. + static vector<string> ARM(); + + private: + // Given STRINGS, an array of C strings with SIZE elements, return an + // equivalent vector<string>. + static vector<string> MakeVector(const char * const *strings, size_t size); + }; + + // Create a handler for the dwarf2reader::CallFrameInfo parser that + // records the stack unwinding information it receives in MODULE. + // + // Use REGISTER_NAMES[I] as the name of register number I; *this + // keeps a reference to the vector, so the vector should remain + // alive for as long as the DwarfCFIToModule does. + // + // Use REPORTER for reporting problems encountered in the conversion + // process. + DwarfCFIToModule(Module *module, const vector<string> ®ister_names, + Reporter *reporter) + : module_(module), register_names_(register_names), reporter_(reporter), + entry_(NULL), return_address_(-1), cfa_name_(".cfa"), ra_name_(".ra") { + } + virtual ~DwarfCFIToModule() { delete entry_; } + + virtual bool Entry(size_t offset, uint64 address, uint64 length, + uint8 version, const string &augmentation, + unsigned return_address); + virtual bool UndefinedRule(uint64 address, int reg); + virtual bool SameValueRule(uint64 address, int reg); + virtual bool OffsetRule(uint64 address, int reg, + int base_register, long offset); + virtual bool ValOffsetRule(uint64 address, int reg, + int base_register, long offset); + virtual bool RegisterRule(uint64 address, int reg, int base_register); + virtual bool ExpressionRule(uint64 address, int reg, + const string &expression); + virtual bool ValExpressionRule(uint64 address, int reg, + const string &expression); + virtual bool End(); + + private: + // Return the name to use for register REG. + string RegisterName(int i); + + // Record RULE for register REG at ADDRESS. + void Record(Module::Address address, int reg, const string &rule); + + // The module to which we should add entries. + Module *module_; + + // Map from register numbers to register names. + const vector<string> ®ister_names_; + + // The reporter to use to report problems. + Reporter *reporter_; + + // The current entry we're constructing. + Module::StackFrameEntry *entry_; + + // The section offset of the current frame description entry, for + // use in error messages. + size_t entry_offset_; + + // The return address column for that entry. + unsigned return_address_; + + // The names of the return address and canonical frame address. Putting + // these here instead of using string literals allows us to share their + // texts in reference-counted std::string implementations (all the + // popular ones). Many, many rules cite these strings. + string cfa_name_, ra_name_; + + // A set of strings used by this CFI. Before storing a string in one of + // our data structures, insert it into this set, and then use the string + // from the set. + // + // Because std::string uses reference counting internally, simply using + // strings from this set, even if passed by value, assigned, or held + // directly in structures and containers (map<string, ...>, for example), + // causes those strings to share a single instance of each distinct piece + // of text. + set<string> common_strings_; +}; + +} // namespace google_breakpad + +#endif // COMMON_LINUX_DWARF_CFI_TO_MODULE_H diff --git a/3rdParty/Breakpad/src/common/dwarf_cu_to_module.cc b/3rdParty/Breakpad/src/common/dwarf_cu_to_module.cc new file mode 100644 index 0000000..ded5f83 --- /dev/null +++ b/3rdParty/Breakpad/src/common/dwarf_cu_to_module.cc @@ -0,0 +1,936 @@ +// Copyright (c) 2010 Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// Implement the DwarfCUToModule class; see dwarf_cu_to_module.h. + +// For <inttypes.h> PRI* macros, before anything else might #include it. +#ifndef __STDC_FORMAT_MACROS +#define __STDC_FORMAT_MACROS +#endif /* __STDC_FORMAT_MACROS */ + +#include "common/dwarf_cu_to_module.h" + +#include <assert.h> +#include <inttypes.h> +#include <stdio.h> + +#include <algorithm> +#include <set> +#include <utility> + +#include "common/dwarf_line_to_module.h" + +namespace google_breakpad { + +using std::map; +using std::pair; +using std::set; +using std::vector; + +// Data provided by a DWARF specification DIE. +// +// In DWARF, the DIE for a definition may contain a DW_AT_specification +// attribute giving the offset of the corresponding declaration DIE, and +// the definition DIE may omit information given in the declaration. For +// example, it's common for a function's address range to appear only in +// its definition DIE, but its name to appear only in its declaration +// DIE. +// +// The dumper needs to be able to follow DW_AT_specification links to +// bring all this information together in a FUNC record. Conveniently, +// DIEs that are the target of such links have a DW_AT_declaration flag +// set, so we can identify them when we first see them, and record their +// contents for later reference. +// +// A Specification holds information gathered from a declaration DIE that +// we may need if we find a DW_AT_specification link pointing to it. +struct DwarfCUToModule::Specification { + // The name of the enclosing scope, or the empty string if there is none. + string enclosing_name; + + // The name for the specification DIE itself, without any enclosing + // name components. + string unqualified_name; +}; + +// An abstract origin -- base definition of an inline function. +struct AbstractOrigin { + AbstractOrigin() : name() {} + AbstractOrigin(const string& name) : name(name) {} + + string name; +}; + +typedef map<uint64, AbstractOrigin> AbstractOriginByOffset; + +// Data global to the DWARF-bearing file that is private to the +// DWARF-to-Module process. +struct DwarfCUToModule::FilePrivate { + // A set of strings used in this CU. Before storing a string in one of + // our data structures, insert it into this set, and then use the string + // from the set. + // + // Because std::string uses reference counting internally, simply using + // strings from this set, even if passed by value, assigned, or held + // directly in structures and containers (map<string, ...>, for example), + // causes those strings to share a single instance of each distinct piece + // of text. + set<string> common_strings; + + // A map from offsets of DIEs within the .debug_info section to + // Specifications describing those DIEs. Specification references can + // cross compilation unit boundaries. + SpecificationByOffset specifications; + + AbstractOriginByOffset origins; +}; + +DwarfCUToModule::FileContext::FileContext(const string &filename_arg, + Module *module_arg) + : filename(filename_arg), module(module_arg) { + file_private = new FilePrivate(); +} + +DwarfCUToModule::FileContext::~FileContext() { + delete file_private; +} + +// Information global to the particular compilation unit we're +// parsing. This is for data shared across the CU's entire DIE tree, +// and parameters from the code invoking the CU parser. +struct DwarfCUToModule::CUContext { + CUContext(FileContext *file_context_arg, WarningReporter *reporter_arg) + : file_context(file_context_arg), + reporter(reporter_arg), + language(Language::CPlusPlus) { } + ~CUContext() { + for (vector<Module::Function *>::iterator it = functions.begin(); + it != functions.end(); it++) + delete *it; + }; + + // The DWARF-bearing file into which this CU was incorporated. + FileContext *file_context; + + // For printing error messages. + WarningReporter *reporter; + + // The source language of this compilation unit. + const Language *language; + + // The functions defined in this compilation unit. We accumulate + // them here during parsing. Then, in DwarfCUToModule::Finish, we + // assign them lines and add them to file_context->module. + // + // Destroying this destroys all the functions this vector points to. + vector<Module::Function *> functions; +}; + +// Information about the context of a particular DIE. This is for +// information that changes as we descend the tree towards the leaves: +// the containing classes/namespaces, etc. +struct DwarfCUToModule::DIEContext { + // The fully-qualified name of the context. For example, for a + // tree like: + // + // DW_TAG_namespace Foo + // DW_TAG_class Bar + // DW_TAG_subprogram Baz + // + // in a C++ compilation unit, the DIEContext's name for the + // DW_TAG_subprogram DIE would be "Foo::Bar". The DIEContext's + // name for the DW_TAG_namespace DIE would be "". + string name; +}; + +// An abstract base class for all the dumper's DIE handlers. +class DwarfCUToModule::GenericDIEHandler: public dwarf2reader::DIEHandler { + public: + // Create a handler for the DIE at OFFSET whose compilation unit is + // described by CU_CONTEXT, and whose immediate context is described + // by PARENT_CONTEXT. + GenericDIEHandler(CUContext *cu_context, DIEContext *parent_context, + uint64 offset) + : cu_context_(cu_context), + parent_context_(parent_context), + offset_(offset), + declaration_(false), + specification_(NULL) { } + + // Derived classes' ProcessAttributeUnsigned can defer to this to + // handle DW_AT_declaration, or simply not override it. + void ProcessAttributeUnsigned(enum DwarfAttribute attr, + enum DwarfForm form, + uint64 data); + + // Derived classes' ProcessAttributeReference can defer to this to + // handle DW_AT_specification, or simply not override it. + void ProcessAttributeReference(enum DwarfAttribute attr, + enum DwarfForm form, + uint64 data); + + // Derived classes' ProcessAttributeReference can defer to this to + // handle DW_AT_specification, or simply not override it. + void ProcessAttributeString(enum DwarfAttribute attr, + enum DwarfForm form, + const string &data); + + protected: + // Compute and return the fully-qualified name of the DIE. If this + // DIE is a declaration DIE, to be cited by other DIEs' + // DW_AT_specification attributes, record its enclosing name and + // unqualified name in the specification table. + // + // Use this from EndAttributes member functions, not ProcessAttribute* + // functions; only the former can be sure that all the DIE's attributes + // have been seen. + string ComputeQualifiedName(); + + CUContext *cu_context_; + DIEContext *parent_context_; + uint64 offset_; + + // If this DIE has a DW_AT_declaration attribute, this is its value. + // It is false on DIEs with no DW_AT_declaration attribute. + bool declaration_; + + // If this DIE has a DW_AT_specification attribute, this is the + // Specification structure for the DIE the attribute refers to. + // Otherwise, this is NULL. + Specification *specification_; + + // The value of the DW_AT_name attribute, or the empty string if the + // DIE has no such attribute. + string name_attribute_; +}; + +void DwarfCUToModule::GenericDIEHandler::ProcessAttributeUnsigned( + enum DwarfAttribute attr, + enum DwarfForm form, + uint64 data) { + switch (attr) { + case dwarf2reader::DW_AT_declaration: declaration_ = (data != 0); break; + default: break; + } +} + +void DwarfCUToModule::GenericDIEHandler::ProcessAttributeReference( + enum DwarfAttribute attr, + enum DwarfForm form, + uint64 data) { + switch (attr) { + case dwarf2reader::DW_AT_specification: { + // Find the Specification to which this attribute refers, and + // set specification_ appropriately. We could do more processing + // here, but it's better to leave the real work to our + // EndAttribute member function, at which point we know we have + // seen all the DIE's attributes. + FileContext *file_context = cu_context_->file_context; + SpecificationByOffset *specifications + = &file_context->file_private->specifications; + SpecificationByOffset::iterator spec = specifications->find(data); + if (spec != specifications->end()) { + specification_ = &spec->second; + } else { + // Technically, there's no reason a DW_AT_specification + // couldn't be a forward reference, but supporting that would + // be a lot of work (changing to a two-pass structure), and I + // don't think any producers we care about ever emit such + // things. + cu_context_->reporter->UnknownSpecification(offset_, data); + } + break; + } + default: break; + } +} + +void DwarfCUToModule::GenericDIEHandler::ProcessAttributeString( + enum DwarfAttribute attr, + enum DwarfForm form, + const string &data) { + switch (attr) { + case dwarf2reader::DW_AT_name: { + // Place the name in our global set of strings, and then use the + // string from the set. Even though the assignment looks like a copy, + // all the major std::string implementations use reference counting + // internally, so the effect is to have all our data structures share + // copies of strings whenever possible. + pair<set<string>::iterator, bool> result = + cu_context_->file_context->file_private->common_strings.insert(data); + name_attribute_ = *result.first; + break; + } + default: break; + } +} + +string DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName() { + // Find our unqualified name. If the DIE has its own DW_AT_name + // attribute, then use that; otherwise, check our specification. + const string *unqualified_name; + if (name_attribute_.empty() && specification_) + unqualified_name = &specification_->unqualified_name; + else + unqualified_name = &name_attribute_; + + // Find the name of our enclosing context. If we have a + // specification, it's the specification's enclosing context that + // counts; otherwise, use this DIE's context. + const string *enclosing_name; + if (specification_) + enclosing_name = &specification_->enclosing_name; + else + enclosing_name = &parent_context_->name; + + // If this DIE was marked as a declaration, record its names in the + // specification table. + if (declaration_) { + FileContext *file_context = cu_context_->file_context; + Specification spec; + spec.enclosing_name = *enclosing_name; + spec.unqualified_name = *unqualified_name; + file_context->file_private->specifications[offset_] = spec; + } + + // Combine the enclosing name and unqualified name to produce our + // own fully-qualified name. + return cu_context_->language->MakeQualifiedName(*enclosing_name, + *unqualified_name); +} + +// A handler class for DW_TAG_subprogram DIEs. +class DwarfCUToModule::FuncHandler: public GenericDIEHandler { + public: + FuncHandler(CUContext *cu_context, DIEContext *parent_context, + uint64 offset) + : GenericDIEHandler(cu_context, parent_context, offset), + low_pc_(0), high_pc_(0), abstract_origin_(NULL), inline_(false) { } + void ProcessAttributeUnsigned(enum DwarfAttribute attr, + enum DwarfForm form, + uint64 data); + void ProcessAttributeSigned(enum DwarfAttribute attr, + enum DwarfForm form, + int64 data); + void ProcessAttributeReference(enum DwarfAttribute attr, + enum DwarfForm form, + uint64 data); + + bool EndAttributes(); + void Finish(); + + private: + // The fully-qualified name, as derived from name_attribute_, + // specification_, parent_context_. Computed in EndAttributes. + string name_; + uint64 low_pc_, high_pc_; // DW_AT_low_pc, DW_AT_high_pc + const AbstractOrigin* abstract_origin_; + bool inline_; +}; + +void DwarfCUToModule::FuncHandler::ProcessAttributeUnsigned( + enum DwarfAttribute attr, + enum DwarfForm form, + uint64 data) { + switch (attr) { + // If this attribute is present at all --- even if its value is + // DW_INL_not_inlined --- then GCC may cite it as someone else's + // DW_AT_abstract_origin attribute. + case dwarf2reader::DW_AT_inline: inline_ = true; break; + + case dwarf2reader::DW_AT_low_pc: low_pc_ = data; break; + case dwarf2reader::DW_AT_high_pc: high_pc_ = data; break; + default: + GenericDIEHandler::ProcessAttributeUnsigned(attr, form, data); + break; + } +} + +void DwarfCUToModule::FuncHandler::ProcessAttributeSigned( + enum DwarfAttribute attr, + enum DwarfForm form, + int64 data) { + switch (attr) { + // If this attribute is present at all --- even if its value is + // DW_INL_not_inlined --- then GCC may cite it as someone else's + // DW_AT_abstract_origin attribute. + case dwarf2reader::DW_AT_inline: inline_ = true; break; + + default: + break; + } +} + +void DwarfCUToModule::FuncHandler::ProcessAttributeReference( + enum DwarfAttribute attr, + enum DwarfForm form, + uint64 data) { + switch(attr) { + case dwarf2reader::DW_AT_abstract_origin: { + const AbstractOriginByOffset& origins = + cu_context_->file_context->file_private->origins; + AbstractOriginByOffset::const_iterator origin = origins.find(data); + if (origin != origins.end()) { + abstract_origin_ = &(origin->second); + } else { + cu_context_->reporter->UnknownAbstractOrigin(offset_, data); + } + break; + } + default: + GenericDIEHandler::ProcessAttributeReference(attr, form, data); + break; + } +} + +bool DwarfCUToModule::FuncHandler::EndAttributes() { + // Compute our name, and record a specification, if appropriate. + name_ = ComputeQualifiedName(); + if (name_.empty() && abstract_origin_) { + name_ = abstract_origin_->name; + } + return true; +} + +void DwarfCUToModule::FuncHandler::Finish() { + // Did we collect the information we need? Not all DWARF function + // entries have low and high addresses (for example, inlined + // functions that were never used), but all the ones we're + // interested in cover a non-empty range of bytes. + if (low_pc_ < high_pc_) { + // Create a Module::Function based on the data we've gathered, and + // add it to the functions_ list. + Module::Function *func = new Module::Function; + // Malformed DWARF may omit the name, but all Module::Functions must + // have names. + if (!name_.empty()) { + func->name = name_; + } else { + cu_context_->reporter->UnnamedFunction(offset_); + func->name = "<name omitted>"; + } + func->address = low_pc_; + func->size = high_pc_ - low_pc_; + func->parameter_size = 0; + if (func->address) { + // If the function address is zero this is a sign that this function + // description is just empty debug data and should just be discarded. + cu_context_->functions.push_back(func); + } + } else if (inline_) { + AbstractOrigin origin(name_); + cu_context_->file_context->file_private->origins[offset_] = origin; + } +} + +// A handler for DIEs that contain functions and contribute a +// component to their names: namespaces, classes, etc. +class DwarfCUToModule::NamedScopeHandler: public GenericDIEHandler { + public: + NamedScopeHandler(CUContext *cu_context, DIEContext *parent_context, + uint64 offset) + : GenericDIEHandler(cu_context, parent_context, offset) { } + bool EndAttributes(); + DIEHandler *FindChildHandler(uint64 offset, enum DwarfTag tag, + const AttributeList &attrs); + + private: + DIEContext child_context_; // A context for our children. +}; + +bool DwarfCUToModule::NamedScopeHandler::EndAttributes() { + child_context_.name = ComputeQualifiedName(); + return true; +} + +dwarf2reader::DIEHandler *DwarfCUToModule::NamedScopeHandler::FindChildHandler( + uint64 offset, + enum DwarfTag tag, + const AttributeList &attrs) { + switch (tag) { + case dwarf2reader::DW_TAG_subprogram: + return new FuncHandler(cu_context_, &child_context_, offset); + case dwarf2reader::DW_TAG_namespace: + case dwarf2reader::DW_TAG_class_type: + case dwarf2reader::DW_TAG_structure_type: + case dwarf2reader::DW_TAG_union_type: + return new NamedScopeHandler(cu_context_, &child_context_, offset); + default: + return NULL; + } +} + +void DwarfCUToModule::WarningReporter::CUHeading() { + if (printed_cu_header_) + return; + fprintf(stderr, "%s: in compilation unit '%s' (offset 0x%llx):\n", + filename_.c_str(), cu_name_.c_str(), cu_offset_); + printed_cu_header_ = true; +} + +void DwarfCUToModule::WarningReporter::UnknownSpecification(uint64 offset, + uint64 target) { + CUHeading(); + fprintf(stderr, "%s: the DIE at offset 0x%llx has a DW_AT_specification" + " attribute referring to the die at offset 0x%llx, which either" + " was not marked as a declaration, or comes later in the file\n", + filename_.c_str(), offset, target); +} + +void DwarfCUToModule::WarningReporter::UnknownAbstractOrigin(uint64 offset, + uint64 target) { + CUHeading(); + fprintf(stderr, "%s: the DIE at offset 0x%llx has a DW_AT_abstract_origin" + " attribute referring to the die at offset 0x%llx, which either" + " was not marked as an inline, or comes later in the file\n", + filename_.c_str(), offset, target); +} + +void DwarfCUToModule::WarningReporter::MissingSection(const string &name) { + CUHeading(); + fprintf(stderr, "%s: warning: couldn't find DWARF '%s' section\n", + filename_.c_str(), name.c_str()); +} + +void DwarfCUToModule::WarningReporter::BadLineInfoOffset(uint64 offset) { + CUHeading(); + fprintf(stderr, "%s: warning: line number data offset beyond end" + " of '.debug_line' section\n", + filename_.c_str()); +} + +void DwarfCUToModule::WarningReporter::UncoveredHeading() { + if (printed_unpaired_header_) + return; + CUHeading(); + fprintf(stderr, "%s: warning: skipping unpaired lines/functions:\n", + filename_.c_str()); + printed_unpaired_header_ = true; +} + +void DwarfCUToModule::WarningReporter::UncoveredFunction( + const Module::Function &function) { + if (!uncovered_warnings_enabled_) + return; + UncoveredHeading(); + fprintf(stderr, " function%s: %s\n", + function.size == 0 ? " (zero-length)" : "", + function.name.c_str()); +} + +void DwarfCUToModule::WarningReporter::UncoveredLine(const Module::Line &line) { + if (!uncovered_warnings_enabled_) + return; + UncoveredHeading(); + fprintf(stderr, " line%s: %s:%d at 0x%" PRIx64 "\n", + (line.size == 0 ? " (zero-length)" : ""), + line.file->name.c_str(), line.number, line.address); +} + +void DwarfCUToModule::WarningReporter::UnnamedFunction(uint64 offset) { + CUHeading(); + fprintf(stderr, "%s: warning: function at offset 0x%llx has no name\n", + filename_.c_str(), offset); +} + +DwarfCUToModule::DwarfCUToModule(FileContext *file_context, + LineToModuleFunctor *line_reader, + WarningReporter *reporter) + : line_reader_(line_reader), has_source_line_info_(false) { + cu_context_ = new CUContext(file_context, reporter); + child_context_ = new DIEContext(); +} + +DwarfCUToModule::~DwarfCUToModule() { + delete cu_context_; + delete child_context_; +} + +void DwarfCUToModule::ProcessAttributeSigned(enum DwarfAttribute attr, + enum DwarfForm form, + int64 data) { + switch (attr) { + case dwarf2reader::DW_AT_language: // source language of this CU + SetLanguage(static_cast<DwarfLanguage>(data)); + break; + default: + break; + } +} + +void DwarfCUToModule::ProcessAttributeUnsigned(enum DwarfAttribute attr, + enum DwarfForm form, + uint64 data) { + switch (attr) { + case dwarf2reader::DW_AT_stmt_list: // Line number information. + has_source_line_info_ = true; + source_line_offset_ = data; + break; + case dwarf2reader::DW_AT_language: // source language of this CU + SetLanguage(static_cast<DwarfLanguage>(data)); + break; + default: + break; + } +} + +void DwarfCUToModule::ProcessAttributeString(enum DwarfAttribute attr, + enum DwarfForm form, + const string &data) { + if (attr == dwarf2reader::DW_AT_name) + cu_context_->reporter->SetCUName(data); +} + +bool DwarfCUToModule::EndAttributes() { + return true; +} + +dwarf2reader::DIEHandler *DwarfCUToModule::FindChildHandler( + uint64 offset, + enum DwarfTag tag, + const AttributeList &attrs) { + switch (tag) { + case dwarf2reader::DW_TAG_subprogram: + return new FuncHandler(cu_context_, child_context_, offset); + case dwarf2reader::DW_TAG_namespace: + case dwarf2reader::DW_TAG_class_type: + case dwarf2reader::DW_TAG_structure_type: + case dwarf2reader::DW_TAG_union_type: + return new NamedScopeHandler(cu_context_, child_context_, offset); + default: + return NULL; + } +} + +void DwarfCUToModule::SetLanguage(DwarfLanguage language) { + switch (language) { + case dwarf2reader::DW_LANG_Java: + cu_context_->language = Language::Java; + break; + + // DWARF has no generic language code for assembly language; this is + // what the GNU toolchain uses. + case dwarf2reader::DW_LANG_Mips_Assembler: + cu_context_->language = Language::Assembler; + break; + + // C++ covers so many cases that it probably has some way to cope + // with whatever the other languages throw at us. So make it the + // default. + // + // Objective C and Objective C++ seem to create entries for + // methods whose DW_AT_name values are already fully-qualified: + // "-[Classname method:]". These appear at the top level. + // + // DWARF data for C should never include namespaces or functions + // nested in struct types, but if it ever does, then C++'s + // notation is probably not a bad choice for that. + default: + case dwarf2reader::DW_LANG_ObjC: + case dwarf2reader::DW_LANG_ObjC_plus_plus: + case dwarf2reader::DW_LANG_C: + case dwarf2reader::DW_LANG_C89: + case dwarf2reader::DW_LANG_C99: + case dwarf2reader::DW_LANG_C_plus_plus: + cu_context_->language = Language::CPlusPlus; + break; + } +} + +void DwarfCUToModule::ReadSourceLines(uint64 offset) { + const dwarf2reader::SectionMap §ion_map + = cu_context_->file_context->section_map; + dwarf2reader::SectionMap::const_iterator map_entry + = section_map.find(".debug_line"); + // Mac OS X puts DWARF data in sections whose names begin with "__" + // instead of ".". + if (map_entry == section_map.end()) + map_entry = section_map.find("__debug_line"); + if (map_entry == section_map.end()) { + cu_context_->reporter->MissingSection(".debug_line"); + return; + } + const char *section_start = map_entry->second.first; + uint64 section_length = map_entry->second.second; + if (offset >= section_length) { + cu_context_->reporter->BadLineInfoOffset(offset); + return; + } + (*line_reader_)(section_start + offset, section_length - offset, + cu_context_->file_context->module, &lines_); +} + +namespace { +// Return true if ADDRESS falls within the range of ITEM. +template <class T> +inline bool within(const T &item, Module::Address address) { + // Because Module::Address is unsigned, and unsigned arithmetic + // wraps around, this will be false if ADDRESS falls before the + // start of ITEM, or if it falls after ITEM's end. + return address - item.address < item.size; +} +} + +void DwarfCUToModule::AssignLinesToFunctions() { + vector<Module::Function *> *functions = &cu_context_->functions; + WarningReporter *reporter = cu_context_->reporter; + + // This would be simpler if we assumed that source line entries + // don't cross function boundaries. However, there's no real reason + // to assume that (say) a series of function definitions on the same + // line wouldn't get coalesced into one line number entry. The + // DWARF spec certainly makes no such promises. + // + // So treat the functions and lines as peers, and take the trouble + // to compute their ranges' intersections precisely. In any case, + // the hair here is a constant factor for performance; the + // complexity from here on out is linear. + + // Put both our functions and lines in order by address. + sort(functions->begin(), functions->end(), + Module::Function::CompareByAddress); + sort(lines_.begin(), lines_.end(), Module::Line::CompareByAddress); + + // The last line that we used any piece of. We use this only for + // generating warnings. + const Module::Line *last_line_used = NULL; + + // The last function and line we warned about --- so we can avoid + // doing so more than once. + const Module::Function *last_function_cited = NULL; + const Module::Line *last_line_cited = NULL; + + // Make a single pass through both vectors from lower to higher + // addresses, populating each Function's lines vector with lines + // from our lines_ vector that fall within the function's address + // range. + vector<Module::Function *>::iterator func_it = functions->begin(); + vector<Module::Line>::const_iterator line_it = lines_.begin(); + + Module::Address current; + + // Pointers to the referents of func_it and line_it, or NULL if the + // iterator is at the end of the sequence. + Module::Function *func; + const Module::Line *line; + + // Start current at the beginning of the first line or function, + // whichever is earlier. + if (func_it != functions->end() && line_it != lines_.end()) { + func = *func_it; + line = &*line_it; + current = std::min(func->address, line->address); + } else if (line_it != lines_.end()) { + func = NULL; + line = &*line_it; + current = line->address; + } else if (func_it != functions->end()) { + func = *func_it; + line = NULL; + current = (*func_it)->address; + } else { + return; + } + + while (func || line) { + // This loop has two invariants that hold at the top. + // + // First, at least one of the iterators is not at the end of its + // sequence, and those that are not refer to the earliest + // function or line that contains or starts after CURRENT. + // + // Note that every byte is in one of four states: it is covered + // or not covered by a function, and, independently, it is + // covered or not covered by a line. + // + // The second invariant is that CURRENT refers to a byte whose + // state is different from its predecessor, or it refers to the + // first byte in the address space. In other words, CURRENT is + // always the address of a transition. + // + // Note that, although each iteration advances CURRENT from one + // transition address to the next in each iteration, it might + // not advance the iterators. Suppose we have a function that + // starts with a line, has a gap, and then a second line, and + // suppose that we enter an iteration with CURRENT at the end of + // the first line. The next transition address is the start of + // the second line, after the gap, so the iteration should + // advance CURRENT to that point. At the head of that iteration, + // the invariants require that the line iterator be pointing at + // the second line. But this is also true at the head of the + // next. And clearly, the iteration must not change the function + // iterator. So neither iterator moves. + + // Assert the first invariant (see above). + assert(!func || current < func->address || within(*func, current)); + assert(!line || current < line->address || within(*line, current)); + + // The next transition after CURRENT. + Module::Address next_transition; + + // Figure out which state we're in, add lines or warn, and compute + // the next transition address. + if (func && current >= func->address) { + if (line && current >= line->address) { + // Covered by both a line and a function. + Module::Address func_left = func->size - (current - func->address); + Module::Address line_left = line->size - (current - line->address); + // This may overflow, but things work out. + next_transition = current + std::min(func_left, line_left); + Module::Line l = *line; + l.address = current; + l.size = next_transition - current; + func->lines.push_back(l); + last_line_used = line; + } else { + // Covered by a function, but no line. + if (func != last_function_cited) { + reporter->UncoveredFunction(*func); + last_function_cited = func; + } + if (line && within(*func, line->address)) + next_transition = line->address; + else + // If this overflows, we'll catch it below. + next_transition = func->address + func->size; + } + } else { + if (line && current >= line->address) { + // Covered by a line, but no function. + // + // If GCC emits padding after one function to align the start + // of the next, then it will attribute the padding + // instructions to the last source line of function (to reduce + // the size of the line number info), but omit it from the + // DW_AT_{low,high}_pc range given in .debug_info (since it + // costs nothing to be precise there). If we did use at least + // some of the line we're about to skip, and it ends at the + // start of the next function, then assume this is what + // happened, and don't warn. + if (line != last_line_cited + && !(func + && line == last_line_used + && func->address - line->address == line->size)) { + reporter->UncoveredLine(*line); + last_line_cited = line; + } + if (func && within(*line, func->address)) + next_transition = func->address; + else + // If this overflows, we'll catch it below. + next_transition = line->address + line->size; + } else { + // Covered by neither a function nor a line. By the invariant, + // both func and line begin after CURRENT. The next transition + // is the start of the next function or next line, whichever + // is earliest. + assert (func || line); + if (func && line) + next_transition = std::min(func->address, line->address); + else if (func) + next_transition = func->address; + else + next_transition = line->address; + } + } + + // If a function or line abuts the end of the address space, then + // next_transition may end up being zero, in which case we've completed + // our pass. Handle that here, instead of trying to deal with it in + // each place we compute next_transition. + if (!next_transition) + break; + + // Advance iterators as needed. If lines overlap or functions overlap, + // then we could go around more than once. We don't worry too much + // about what result we produce in that case, just as long as we don't + // hang or crash. + while (func_it != functions->end() + && next_transition >= (*func_it)->address + && !within(**func_it, next_transition)) + func_it++; + func = (func_it != functions->end()) ? *func_it : NULL; + while (line_it != lines_.end() + && next_transition >= line_it->address + && !within(*line_it, next_transition)) + line_it++; + line = (line_it != lines_.end()) ? &*line_it : NULL; + + // We must make progress. + assert(next_transition > current); + current = next_transition; + } +} + +void DwarfCUToModule::Finish() { + // Assembly language files have no function data, and that gives us + // no place to store our line numbers (even though the GNU toolchain + // will happily produce source line info for assembly language + // files). To avoid spurious warnings about lines we can't assign + // to functions, skip CUs in languages that lack functions. + if (!cu_context_->language->HasFunctions()) + return; + + // Read source line info, if we have any. + if (has_source_line_info_) + ReadSourceLines(source_line_offset_); + + vector<Module::Function *> *functions = &cu_context_->functions; + + // Dole out lines to the appropriate functions. + AssignLinesToFunctions(); + + // Add our functions, which now have source lines assigned to them, + // to module_. + cu_context_->file_context->module->AddFunctions(functions->begin(), + functions->end()); + + // Ownership of the function objects has shifted from cu_context to + // the Module. + functions->clear(); +} + +bool DwarfCUToModule::StartCompilationUnit(uint64 offset, + uint8 address_size, + uint8 offset_size, + uint64 cu_length, + uint8 dwarf_version) { + return dwarf_version >= 2; +} + +bool DwarfCUToModule::StartRootDIE(uint64 offset, enum DwarfTag tag, + const AttributeList& attrs) { + // We don't deal with partial compilation units (the only other tag + // likely to be used for root DIE). + return tag == dwarf2reader::DW_TAG_compile_unit; +} + +} // namespace google_breakpad diff --git a/3rdParty/Breakpad/src/common/dwarf_cu_to_module.h b/3rdParty/Breakpad/src/common/dwarf_cu_to_module.h new file mode 100644 index 0000000..ac62846 --- /dev/null +++ b/3rdParty/Breakpad/src/common/dwarf_cu_to_module.h @@ -0,0 +1,277 @@ +// -*- mode: c++ -*- + +// Copyright (c) 2010 Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// Add DWARF debugging information to a Breakpad symbol file. This +// file defines the DwarfCUToModule class, which accepts parsed DWARF +// data and populates a google_breakpad::Module with the results; the +// Module can then write its contents as a Breakpad symbol file. + +#ifndef COMMON_LINUX_DWARF_CU_TO_MODULE_H__ +#define COMMON_LINUX_DWARF_CU_TO_MODULE_H__ + +#include <string> + +#include "common/language.h" +#include "common/module.h" +#include "common/dwarf/bytereader.h" +#include "common/dwarf/dwarf2diehandler.h" +#include "common/dwarf/dwarf2reader.h" +#include "common/using_std_string.h" + +namespace google_breakpad { + +using dwarf2reader::AttributeList; +using dwarf2reader::DwarfAttribute; +using dwarf2reader::DwarfForm; +using dwarf2reader::DwarfLanguage; +using dwarf2reader::DwarfTag; + +// Populate a google_breakpad::Module with DWARF debugging information. +// +// An instance of this class can be provided as a handler to a +// dwarf2reader::DIEDispatcher, which can in turn be a handler for a +// dwarf2reader::CompilationUnit DWARF parser. The handler uses the results +// of parsing to populate a google_breakpad::Module with source file, +// function, and source line information. +class DwarfCUToModule: public dwarf2reader::RootDIEHandler { + struct FilePrivate; + public: + + // Information global to the DWARF-bearing file we are processing, + // for use by DwarfCUToModule. Each DwarfCUToModule instance deals + // with a single compilation unit within the file, but information + // global to the whole file is held here. The client is responsible + // for filling it in appropriately (except for the 'file_private' + // field, which the constructor and destructor take care of), and + // then providing it to the DwarfCUToModule instance for each + // compilation unit we process in that file. + struct FileContext { + FileContext(const string &filename_arg, Module *module_arg); + ~FileContext(); + + // The name of this file, for use in error messages. + string filename; + + // A map of this file's sections, used for finding other DWARF + // sections that the .debug_info section may refer to. + dwarf2reader::SectionMap section_map; + + // The Module to which we're contributing definitions. + Module *module; + + // Inter-compilation unit data used internally by the handlers. + FilePrivate *file_private; + }; + + // An abstract base class for functors that handle DWARF line data + // for DwarfCUToModule. DwarfCUToModule could certainly just use + // dwarf2reader::LineInfo itself directly, but decoupling things + // this way makes unit testing a little easier. + class LineToModuleFunctor { + public: + LineToModuleFunctor() { } + virtual ~LineToModuleFunctor() { } + + // Populate MODULE and LINES with source file names and code/line + // mappings, given a pointer to some DWARF line number data + // PROGRAM, and an overestimate of its size. Add no zero-length + // lines to LINES. + virtual void operator()(const char *program, uint64 length, + Module *module, vector<Module::Line> *lines) = 0; + }; + + // The interface DwarfCUToModule uses to report warnings. The member + // function definitions for this class write messages to stderr, but + // you can override them if you'd like to detect or report these + // conditions yourself. + class WarningReporter { + public: + // Warn about problems in the DWARF file FILENAME, in the + // compilation unit at OFFSET. + WarningReporter(const string &filename, uint64 cu_offset) + : filename_(filename), cu_offset_(cu_offset), printed_cu_header_(false), + printed_unpaired_header_(false), + uncovered_warnings_enabled_(false) { } + virtual ~WarningReporter() { } + + // Set the name of the compilation unit we're processing to NAME. + virtual void SetCUName(const string &name) { cu_name_ = name; } + + // Accessor and setter for uncovered_warnings_enabled_. + // UncoveredFunction and UncoveredLine only report a problem if that is + // true. By default, these warnings are disabled, because those + // conditions occur occasionally in healthy code. + virtual bool uncovered_warnings_enabled() const { + return uncovered_warnings_enabled_; + } + virtual void set_uncovered_warnings_enabled(bool value) { + uncovered_warnings_enabled_ = value; + } + + // A DW_AT_specification in the DIE at OFFSET refers to a DIE we + // haven't processed yet, or that wasn't marked as a declaration, + // at TARGET. + virtual void UnknownSpecification(uint64 offset, uint64 target); + + // A DW_AT_abstract_origin in the DIE at OFFSET refers to a DIE we + // haven't processed yet, or that wasn't marked as inline, at TARGET. + virtual void UnknownAbstractOrigin(uint64 offset, uint64 target); + + // We were unable to find the DWARF section named SECTION_NAME. + virtual void MissingSection(const string §ion_name); + + // The CU's DW_AT_stmt_list offset OFFSET is bogus. + virtual void BadLineInfoOffset(uint64 offset); + + // FUNCTION includes code covered by no line number data. + virtual void UncoveredFunction(const Module::Function &function); + + // Line number NUMBER in LINE_FILE, of length LENGTH, includes code + // covered by no function. + virtual void UncoveredLine(const Module::Line &line); + + // The DW_TAG_subprogram DIE at OFFSET has no name specified directly + // in the DIE, nor via a DW_AT_specification or DW_AT_abstract_origin + // link. + virtual void UnnamedFunction(uint64 offset); + + protected: + string filename_; + uint64 cu_offset_; + string cu_name_; + bool printed_cu_header_; + bool printed_unpaired_header_; + bool uncovered_warnings_enabled_; + + private: + // Print a per-CU heading, once. + void CUHeading(); + // Print an unpaired function/line heading, once. + void UncoveredHeading(); + }; + + // Create a DWARF debugging info handler for a compilation unit + // within FILE_CONTEXT. This uses information received from the + // dwarf2reader::CompilationUnit DWARF parser to populate + // FILE_CONTEXT->module. Use LINE_READER to handle the compilation + // unit's line number data. Use REPORTER to report problems with the + // data we find. + DwarfCUToModule(FileContext *file_context, + LineToModuleFunctor *line_reader, + WarningReporter *reporter); + ~DwarfCUToModule(); + + void ProcessAttributeSigned(enum DwarfAttribute attr, + enum DwarfForm form, + int64 data); + void ProcessAttributeUnsigned(enum DwarfAttribute attr, + enum DwarfForm form, + uint64 data); + void ProcessAttributeString(enum DwarfAttribute attr, + enum DwarfForm form, + const string &data); + bool EndAttributes(); + DIEHandler *FindChildHandler(uint64 offset, enum DwarfTag tag, + const AttributeList &attrs); + + // Assign all our source Lines to the Functions that cover their + // addresses, and then add them to module_. + void Finish(); + + bool StartCompilationUnit(uint64 offset, uint8 address_size, + uint8 offset_size, uint64 cu_length, + uint8 dwarf_version); + bool StartRootDIE(uint64 offset, enum DwarfTag tag, + const AttributeList& attrs); + + private: + + // Used internally by the handler. Full definitions are in + // dwarf_cu_to_module.cc. + struct FilePrivate; + struct Specification; + struct CUContext; + struct DIEContext; + class GenericDIEHandler; + class FuncHandler; + class NamedScopeHandler; + + // A map from section offsets to specifications. + typedef map<uint64, Specification> SpecificationByOffset; + + // Set this compilation unit's source language to LANGUAGE. + void SetLanguage(DwarfLanguage language); + + // Read source line information at OFFSET in the .debug_line + // section. Record source files in module_, but record source lines + // in lines_; we apportion them to functions in + // AssignLinesToFunctions. + void ReadSourceLines(uint64 offset); + + // Assign the lines in lines_ to the individual line lists of the + // functions in functions_. (DWARF line information maps an entire + // compilation unit at a time, and gives no indication of which + // lines belong to which functions, beyond their addresses.) + void AssignLinesToFunctions(); + + // The only reason cu_context_ and child_context_ are pointers is + // that we want to keep their definitions private to + // dwarf_cu_to_module.cc, instead of listing them all here. They are + // owned by this DwarfCUToModule: the constructor sets them, and the + // destructor deletes them. + + // The functor to use to handle line number data. + LineToModuleFunctor *line_reader_; + + // This compilation unit's context. + CUContext *cu_context_; + + // A context for our children. + DIEContext *child_context_; + + // True if this compilation unit has source line information. + bool has_source_line_info_; + + // The offset of this compilation unit's line number information in + // the .debug_line section. + uint64 source_line_offset_; + + // The line numbers we have seen thus far. We accumulate these here + // during parsing. Then, in Finish, we call AssignLinesToFunctions + // to dole them out to the appropriate functions. + vector<Module::Line> lines_; +}; + +} // namespace google_breakpad + +#endif // COMMON_LINUX_DWARF_CU_TO_MODULE_H__ diff --git a/3rdParty/Breakpad/src/common/dwarf_line_to_module.cc b/3rdParty/Breakpad/src/common/dwarf_line_to_module.cc new file mode 100644 index 0000000..962848d --- /dev/null +++ b/3rdParty/Breakpad/src/common/dwarf_line_to_module.cc @@ -0,0 +1,138 @@ +// Copyright (c) 2010 Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// dwarf_line_to_module.cc: Implementation of DwarfLineToModule class. +// See dwarf_line_to_module.h for details. + +#include <stdio.h> + +#include <string> + +#include "common/dwarf_line_to_module.h" +#include "common/using_std_string.h" + +// Trying to support Windows paths in a reasonable way adds a lot of +// variations to test; it would be better to just put off dealing with +// it until we actually have to deal with DWARF on Windows. + +// Return true if PATH is an absolute path, false if it is relative. +static bool PathIsAbsolute(const string &path) { + return (path.size() >= 1 && path[0] == '/'); +} + +// If PATH is an absolute path, return PATH. If PATH is a relative path, +// treat it as relative to BASE and return the combined path. +static string ExpandPath(const string &path, + const string &base) { + if (PathIsAbsolute(path)) + return path; + return base + "/" + path; +} + +namespace google_breakpad { + +void DwarfLineToModule::DefineDir(const string &name, uint32 dir_num) { + // Directory number zero is reserved to mean the compilation + // directory. Silently ignore attempts to redefine it. + if (dir_num != 0) + directories_[dir_num] = name; +} + +void DwarfLineToModule::DefineFile(const string &name, int32 file_num, + uint32 dir_num, uint64 mod_time, + uint64 length) { + if (file_num == -1) + file_num = ++highest_file_number_; + else if (file_num > highest_file_number_) + highest_file_number_ = file_num; + + string full_name; + if (dir_num != 0) { + DirectoryTable::const_iterator directory_it = directories_.find(dir_num); + if (directory_it != directories_.end()) { + full_name = ExpandPath(name, directory_it->second); + } else { + if (!warned_bad_directory_number_) { + fprintf(stderr, "warning: DWARF line number data refers to undefined" + " directory numbers\n"); + warned_bad_directory_number_ = true; + } + full_name = name; // just treat name as relative + } + } else { + // Directory number zero is the compilation directory; we just report + // relative paths in that case. + full_name = name; + } + + // Find a Module::File object of the given name, and add it to the + // file table. + files_[file_num] = module_->FindFile(full_name); +} + +void DwarfLineToModule::AddLine(uint64 address, uint64 length, + uint32 file_num, uint32 line_num, + uint32 column_num) { + if (length == 0) + return; + + // Clip lines not to extend beyond the end of the address space. + if (address + length < address) + length = -address; + + // Should we omit this line? (See the comments for omitted_line_end_.) + if (address == 0 || address == omitted_line_end_) { + omitted_line_end_ = address + length; + return; + } else { + omitted_line_end_ = 0; + } + + // Find the source file being referred to. + Module::File *file = files_[file_num]; + if (!file) { + if (!warned_bad_file_number_) { + fprintf(stderr, "warning: DWARF line number data refers to " + "undefined file numbers\n"); + warned_bad_file_number_ = true; + } + return; + } + Module::Line line; + line.address = address; + // We set the size when we get the next line or the EndSequence call. + line.size = length; + line.file = file; + line.number = line_num; + lines_->push_back(line); +} + +} // namespace google_breakpad diff --git a/3rdParty/Breakpad/src/common/dwarf_line_to_module.h b/3rdParty/Breakpad/src/common/dwarf_line_to_module.h new file mode 100644 index 0000000..9382e40 --- /dev/null +++ b/3rdParty/Breakpad/src/common/dwarf_line_to_module.h @@ -0,0 +1,182 @@ +// -*- mode: c++ -*- + +// Copyright (c) 2010 Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// The DwarfLineToModule class accepts line number information from a +// DWARF parser and adds it to a google_breakpad::Module. The Module +// can write that data out as a Breakpad symbol file. + +#ifndef COMMON_LINUX_DWARF_LINE_TO_MODULE_H +#define COMMON_LINUX_DWARF_LINE_TO_MODULE_H + +#include <string> + +#include "common/module.h" +#include "common/dwarf/dwarf2reader.h" +#include "common/using_std_string.h" + +namespace google_breakpad { + +// A class for producing a vector of google_breakpad::Module::Line +// instances from parsed DWARF line number data. +// +// An instance of this class can be provided as a handler to a +// dwarf2reader::LineInfo DWARF line number information parser. The +// handler accepts source location information from the parser and +// uses it to produce a vector of google_breakpad::Module::Line +// objects, referring to google_breakpad::Module::File objects added +// to a particular google_breakpad::Module. +// +// GNU toolchain omitted sections support: +// ====================================== +// +// Given the right options, the GNU toolchain will omit unreferenced +// functions from the final executable. Unfortunately, when it does so, it +// does not remove the associated portions of the DWARF line number +// program; instead, it gives the DW_LNE_set_address instructions referring +// to the now-deleted code addresses of zero. Given this input, the DWARF +// line parser will call AddLine with a series of lines starting at address +// zero. For example, here is the output from 'readelf -wl' for a program +// with four functions, the first three of which have been omitted: +// +// Line Number Statements: +// Extended opcode 2: set Address to 0x0 +// Advance Line by 14 to 15 +// Copy +// Special opcode 48: advance Address by 3 to 0x3 and Line by 1 to 16 +// Special opcode 119: advance Address by 8 to 0xb and Line by 2 to 18 +// Advance PC by 2 to 0xd +// Extended opcode 1: End of Sequence +// +// Extended opcode 2: set Address to 0x0 +// Advance Line by 14 to 15 +// Copy +// Special opcode 48: advance Address by 3 to 0x3 and Line by 1 to 16 +// Special opcode 119: advance Address by 8 to 0xb and Line by 2 to 18 +// Advance PC by 2 to 0xd +// Extended opcode 1: End of Sequence +// +// Extended opcode 2: set Address to 0x0 +// Advance Line by 19 to 20 +// Copy +// Special opcode 48: advance Address by 3 to 0x3 and Line by 1 to 21 +// Special opcode 76: advance Address by 5 to 0x8 and Line by 1 to 22 +// Advance PC by 2 to 0xa +// Extended opcode 1: End of Sequence +// +// Extended opcode 2: set Address to 0x80483a4 +// Advance Line by 23 to 24 +// Copy +// Special opcode 202: advance Address by 14 to 0x80483b2 and Line by 1 to 25 +// Special opcode 76: advance Address by 5 to 0x80483b7 and Line by 1 to 26 +// Advance PC by 6 to 0x80483bd +// Extended opcode 1: End of Sequence +// +// Instead of collecting runs of lines describing code that is not there, +// we try to recognize and drop them. Since the linker doesn't explicitly +// distinguish references to dropped sections from genuine references to +// code at address zero, we must use a heuristic. We have chosen: +// +// - If a line starts at address zero, omit it. (On the platforms +// breakpad targets, it is extremely unlikely that there will be code +// at address zero.) +// +// - If a line starts immediately after an omitted line, omit it too. +class DwarfLineToModule: public dwarf2reader::LineInfoHandler { + public: + // As the DWARF line info parser passes us line records, add source + // files to MODULE, and add all lines to the end of LINES. LINES + // need not be empty. If the parser hands us a zero-length line, we + // omit it. If the parser hands us a line that extends beyond the + // end of the address space, we clip it. It's up to our client to + // sort out which lines belong to which functions; we don't add them + // to any particular function in MODULE ourselves. + DwarfLineToModule(Module *module, vector<Module::Line> *lines) + : module_(module), + lines_(lines), + highest_file_number_(-1), + omitted_line_end_(0), + warned_bad_file_number_(false), + warned_bad_directory_number_(false) { } + + ~DwarfLineToModule() { } + + void DefineDir(const string &name, uint32 dir_num); + void DefineFile(const string &name, int32 file_num, + uint32 dir_num, uint64 mod_time, + uint64 length); + void AddLine(uint64 address, uint64 length, + uint32 file_num, uint32 line_num, uint32 column_num); + + private: + + typedef std::map<uint32, string> DirectoryTable; + typedef std::map<uint32, Module::File *> FileTable; + + // The module we're contributing debugging info to. Owned by our + // client. + Module *module_; + + // The vector of lines we're accumulating. Owned by our client. + // + // In a Module, as in a breakpad symbol file, lines belong to + // specific functions, but DWARF simply assigns lines to addresses; + // one must infer the line/function relationship using the + // functions' beginning and ending addresses. So we can't add these + // to the appropriate function from module_ until we've read the + // function info as well. Instead, we accumulate lines here, and let + // whoever constructed this sort it all out. + vector<Module::Line> *lines_; + + // A table mapping directory numbers to paths. + DirectoryTable directories_; + + // A table mapping file numbers to Module::File pointers. + FileTable files_; + + // The highest file number we've seen so far, or -1 if we've seen + // none. Used for dynamically defined file numbers. + int32 highest_file_number_; + + // This is the ending address of the last line we omitted, or zero if we + // didn't omit the previous line. It is zero before we have received any + // AddLine calls. + uint64 omitted_line_end_; + + // True if we've warned about: + bool warned_bad_file_number_; // bad file numbers + bool warned_bad_directory_number_; // bad directory numbers +}; + +} // namespace google_breakpad + +#endif // COMMON_LINUX_DWARF_LINE_TO_MODULE_H diff --git a/3rdParty/Breakpad/src/common/language.cc b/3rdParty/Breakpad/src/common/language.cc new file mode 100644 index 0000000..c2fd81f --- /dev/null +++ b/3rdParty/Breakpad/src/common/language.cc @@ -0,0 +1,83 @@ +// Copyright (c) 2010 Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// language.cc: Subclasses and singletons for google_breakpad::Language. +// See language.h for details. + +#include "common/language.h" + +namespace google_breakpad { + +// C++ language-specific operations. +class CPPLanguage: public Language { + public: + CPPLanguage() {} + string MakeQualifiedName(const string &parent_name, + const string &name) const { + if (parent_name.empty()) + return name; + else + return parent_name + "::" + name; + } +}; + +CPPLanguage CPPLanguageSingleton; + +// Java language-specific operations. +class JavaLanguage: public Language { + public: + string MakeQualifiedName(const string &parent_name, + const string &name) const { + if (parent_name.empty()) + return name; + else + return parent_name + "." + name; + } +}; + +JavaLanguage JavaLanguageSingleton; + +// Assembler language-specific operations. +class AssemblerLanguage: public Language { + bool HasFunctions() const { return false; } + string MakeQualifiedName(const string &parent_name, + const string &name) const { + return name; + } +}; + +AssemblerLanguage AssemblerLanguageSingleton; + +const Language * const Language::CPlusPlus = &CPPLanguageSingleton; +const Language * const Language::Java = &JavaLanguageSingleton; +const Language * const Language::Assembler = &AssemblerLanguageSingleton; + +} // namespace google_breakpad diff --git a/3rdParty/Breakpad/src/common/language.h b/3rdParty/Breakpad/src/common/language.h new file mode 100644 index 0000000..bbe3033 --- /dev/null +++ b/3rdParty/Breakpad/src/common/language.h @@ -0,0 +1,88 @@ +// -*- mode: c++ -*- + +// Copyright (c) 2010 Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// language.h: Define google_breakpad::Language. Instances of +// subclasses of this class provide language-appropriate operations +// for the Breakpad symbol dumper. + +#ifndef COMMON_LINUX_LANGUAGE_H__ +#define COMMON_LINUX_LANGUAGE_H__ + +#include <string> + +#include "common/using_std_string.h" + +namespace google_breakpad { + +// An abstract base class for language-specific operations. We choose +// an instance of a subclass of this when we find the CU's language. +// This class's definitions are appropriate for CUs with no specified +// language. +class Language { + public: + // A base class destructor should be either public and virtual, + // or protected and nonvirtual. + virtual ~Language() {} + + // Return true if this language has functions to which we can assign + // line numbers. (Debugging info for assembly language, for example, + // can have source location information, but does not have functions + // recorded using DW_TAG_subprogram DIEs.) + virtual bool HasFunctions() const { return true; } + + // Construct a fully-qualified, language-appropriate form of NAME, + // given that PARENT_NAME is the name of the construct enclosing + // NAME. If PARENT_NAME is the empty string, then NAME is a + // top-level name. + // + // This API sort of assumes that a fully-qualified name is always + // some simple textual composition of the unqualified name and its + // parent's name, and that we don't need to know anything else about + // the parent or the child (say, their DIEs' tags) to do the job. + // This is true for the languages we support at the moment, and + // keeps things concrete. Perhaps a more refined operation would + // take into account the parent and child DIE types, allow languages + // to use their own data type for complex parent names, etc. But if + // C++ doesn't need all that, who would? + virtual string MakeQualifiedName (const string &parent_name, + const string &name) const = 0; + + // Instances for specific languages. + static const Language * const CPlusPlus, + * const Java, + * const Assembler; +}; + +} // namespace google_breakpad + +#endif // COMMON_LINUX_LANGUAGE_H__ diff --git a/3rdParty/Breakpad/src/common/linux/linux_libc_support.h b/3rdParty/Breakpad/src/common/linux/linux_libc_support.h new file mode 100644 index 0000000..b2f47af --- /dev/null +++ b/3rdParty/Breakpad/src/common/linux/linux_libc_support.h @@ -0,0 +1,95 @@ +// Copyright (c) 2009, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// This header provides replacements for libc functions that we need. We if +// call the libc functions directly we risk crashing in the dynamic linker as +// it tries to resolve uncached PLT entries. + +#ifndef CLIENT_LINUX_LINUX_LIBC_SUPPORT_H_ +#define CLIENT_LINUX_LINUX_LIBC_SUPPORT_H_ + +#include <stdint.h> +#include <limits.h> +#include <sys/types.h> + +extern "C" { + +extern size_t my_strlen(const char* s); + +extern int my_strcmp(const char* a, const char* b); + +extern int my_strncmp(const char* a, const char* b, size_t len); + +// Parse a non-negative integer. +// result: (output) the resulting non-negative integer +// s: a NUL terminated string +// Return true iff successful. +extern bool my_strtoui(int* result, const char* s); + +// Return the length of the given, non-negative integer when expressed in base +// 10. +extern unsigned my_int_len(intmax_t i); + +// Convert a non-negative integer to a string +// output: (output) the resulting string is written here. This buffer must be +// large enough to hold the resulting string. Call |my_int_len| to get the +// required length. +// i: the non-negative integer to serialise. +// i_len: the length of the integer in base 10 (see |my_int_len|). +extern void my_itos(char* output, intmax_t i, unsigned i_len); + +extern const char* my_strchr(const char* haystack, char needle); + +extern const char* my_strrchr(const char* haystack, char needle); + +// Read a hex value +// result: (output) the resulting value +// s: a string +// Returns a pointer to the first invalid charactor. +extern const char* my_read_hex_ptr(uintptr_t* result, const char* s); + +extern const char* my_read_decimal_ptr(uintptr_t* result, const char* s); + +extern void my_memset(void* ip, char c, size_t len); + +// The following are considered safe to use in a compromised environment. +// Besides, this gives the compiler an opportunity to optimize their calls. +#define my_memcpy memcpy +#define my_memmove memmove +#define my_memcmp memcmp + +extern size_t my_strlcpy(char* s1, const char* s2, size_t len); + +extern size_t my_strlcat(char* s1, const char* s2, size_t len); + +extern int my_isspace(int ch); + +} // extern "C" + +#endif // CLIENT_LINUX_LINUX_LIBC_SUPPORT_H_ diff --git a/3rdParty/Breakpad/src/common/mac/MachIPC.h b/3rdParty/Breakpad/src/common/mac/MachIPC.h new file mode 100644 index 0000000..52bed59 --- /dev/null +++ b/3rdParty/Breakpad/src/common/mac/MachIPC.h @@ -0,0 +1,301 @@ +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// MachIPC.h +// +// Some helpful wrappers for using Mach IPC calls + +#ifndef MACH_IPC_H__ +#define MACH_IPC_H__ + +#import <mach/mach.h> +#import <mach/message.h> +#import <servers/bootstrap.h> +#import <sys/types.h> + +#import <CoreServices/CoreServices.h> + +//============================================================================== +// DISCUSSION: +// +// The three main classes of interest are +// +// MachMessage: a wrapper for a mach message of the following form +// mach_msg_header_t +// mach_msg_body_t +// optional descriptors +// optional extra message data +// +// MachReceiveMessage and MachSendMessage subclass MachMessage +// and are used instead of MachMessage which is an abstract base class +// +// ReceivePort: +// Represents a mach port for which we have receive rights +// +// MachPortSender: +// Represents a mach port for which we have send rights +// +// Here's an example to receive a message on a server port: +// +// // This creates our named server port +// ReceivePort receivePort("com.Google.MyService"); +// +// MachReceiveMessage message; +// kern_return_t result = receivePort.WaitForMessage(&message, 0); +// +// if (result == KERN_SUCCESS && message.GetMessageID() == 57) { +// mach_port_t task = message.GetTranslatedPort(0); +// mach_port_t thread = message.GetTranslatedPort(1); +// +// char *messageString = message.GetData(); +// +// printf("message string = %s\n", messageString); +// } +// +// Here is an example of using these classes to send a message to this port: +// +// // send to already named port +// MachPortSender sender("com.Google.MyService"); +// MachSendMessage message(57); // our message ID is 57 +// +// // add some ports to be translated for us +// message.AddDescriptor(mach_task_self()); // our task +// message.AddDescriptor(mach_thread_self()); // this thread +// +// char messageString[] = "Hello server!\n"; +// message.SetData(messageString, strlen(messageString)+1); +// +// kern_return_t result = sender.SendMessage(message, 1000); // timeout 1000ms +// + +namespace google_breakpad { +#define PRINT_MACH_RESULT(result_, message_) \ + printf(message_" %s (%d)\n", mach_error_string(result_), result_ ); + +//============================================================================== +// A wrapper class for mach_msg_port_descriptor_t (with same memory layout) +// with convenient constructors and accessors +class MachMsgPortDescriptor : public mach_msg_port_descriptor_t { + public: + // General-purpose constructor + MachMsgPortDescriptor(mach_port_t in_name, + mach_msg_type_name_t in_disposition) { + name = in_name; + pad1 = 0; + pad2 = 0; + disposition = in_disposition; + type = MACH_MSG_PORT_DESCRIPTOR; + } + + // For passing send rights to a port + MachMsgPortDescriptor(mach_port_t in_name) { + name = in_name; + pad1 = 0; + pad2 = 0; + disposition = MACH_MSG_TYPE_COPY_SEND; + type = MACH_MSG_PORT_DESCRIPTOR; + } + + // Copy constructor + MachMsgPortDescriptor(const MachMsgPortDescriptor& desc) { + name = desc.name; + pad1 = desc.pad1; + pad2 = desc.pad2; + disposition = desc.disposition; + type = desc.type; + } + + mach_port_t GetMachPort() const { + return name; + } + + mach_msg_type_name_t GetDisposition() const { + return disposition; + } + + // For convenience + operator mach_port_t() const { + return GetMachPort(); + } +}; + +//============================================================================== +// MachMessage: a wrapper for a mach message +// (mach_msg_header_t, mach_msg_body_t, extra data) +// +// This considerably simplifies the construction of a message for sending +// and the getting at relevant data and descriptors for the receiver. +// +// Currently the combined size of the descriptors plus data must be +// less than 1024. But as a benefit no memory allocation is necessary. +// +// TODO: could consider adding malloc() support for very large messages +// +// A MachMessage object is used by ReceivePort::WaitForMessage +// and MachPortSender::SendMessage +// +class MachMessage { + public: + + // The receiver of the message can retrieve the raw data this way + u_int8_t *GetData() { + return GetDataLength() > 0 ? GetDataPacket()->data : NULL; + } + + u_int32_t GetDataLength() { + return EndianU32_LtoN(GetDataPacket()->data_length); + } + + // The message ID may be used as a code identifying the type of message + void SetMessageID(int32_t message_id) { + GetDataPacket()->id = EndianU32_NtoL(message_id); + } + + int32_t GetMessageID() { return EndianU32_LtoN(GetDataPacket()->id); } + + // Adds a descriptor (typically a mach port) to be translated + // returns true if successful, otherwise not enough space + bool AddDescriptor(const MachMsgPortDescriptor &desc); + + int GetDescriptorCount() const { return body.msgh_descriptor_count; } + MachMsgPortDescriptor *GetDescriptor(int n); + + // Convenience method which gets the mach port described by the descriptor + mach_port_t GetTranslatedPort(int n); + + // A simple message is one with no descriptors + bool IsSimpleMessage() const { return GetDescriptorCount() == 0; } + + // Sets raw data for the message (returns false if not enough space) + bool SetData(void *data, int32_t data_length); + + protected: + // Consider this an abstract base class - must create an actual instance + // of MachReceiveMessage or MachSendMessage + + MachMessage() { + memset(this, 0, sizeof(MachMessage)); + } + + friend class ReceivePort; + friend class MachPortSender; + + // Represents raw data in our message + struct MessageDataPacket { + int32_t id; // little-endian + int32_t data_length; // little-endian + u_int8_t data[1]; // actual size limited by sizeof(MachMessage) + }; + + MessageDataPacket* GetDataPacket(); + + void SetDescriptorCount(int n); + void SetDescriptor(int n, const MachMsgPortDescriptor &desc); + + // Returns total message size setting msgh_size in the header to this value + mach_msg_size_t CalculateSize(); + + mach_msg_header_t head; + mach_msg_body_t body; + u_int8_t padding[1024]; // descriptors and data may be embedded here +}; + +//============================================================================== +// MachReceiveMessage and MachSendMessage are useful to separate the idea +// of a mach message being sent and being received, and adds increased type +// safety: +// ReceivePort::WaitForMessage() only accepts a MachReceiveMessage +// MachPortSender::SendMessage() only accepts a MachSendMessage + +//============================================================================== +class MachReceiveMessage : public MachMessage { + public: + MachReceiveMessage() : MachMessage() {}; +}; + +//============================================================================== +class MachSendMessage : public MachMessage { + public: + MachSendMessage(int32_t message_id); +}; + +//============================================================================== +// Represents a mach port for which we have receive rights +class ReceivePort { + public: + // Creates a new mach port for receiving messages and registers a name for it + explicit ReceivePort(const char *receive_port_name); + + // Given an already existing mach port, use it. We take ownership of the + // port and deallocate it in our destructor. + explicit ReceivePort(mach_port_t receive_port); + + // Create a new mach port for receiving messages + ReceivePort(); + + ~ReceivePort(); + + // Waits on the mach port until message received or timeout + kern_return_t WaitForMessage(MachReceiveMessage *out_message, + mach_msg_timeout_t timeout); + + // The underlying mach port that we wrap + mach_port_t GetPort() const { return port_; } + + private: + ReceivePort(const ReceivePort&); // disable copy c-tor + + mach_port_t port_; + kern_return_t init_result_; +}; + +//============================================================================== +// Represents a mach port for which we have send rights +class MachPortSender { + public: + // get a port with send rights corresponding to a named registered service + explicit MachPortSender(const char *receive_port_name); + + + // Given an already existing mach port, use it. + explicit MachPortSender(mach_port_t send_port); + + kern_return_t SendMessage(MachSendMessage &message, + mach_msg_timeout_t timeout); + + private: + MachPortSender(const MachPortSender&); // disable copy c-tor + + mach_port_t send_port_; + kern_return_t init_result_; +}; + +} // namespace google_breakpad + +#endif // MACH_IPC_H__ diff --git a/3rdParty/Breakpad/src/common/mac/MachIPC.mm b/3rdParty/Breakpad/src/common/mac/MachIPC.mm new file mode 100644 index 0000000..dc9773f --- /dev/null +++ b/3rdParty/Breakpad/src/common/mac/MachIPC.mm @@ -0,0 +1,306 @@ +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// MachIPC.mm +// Wrapper for mach IPC calls + +#import <stdio.h> +#import "MachIPC.h" +#include "common/mac/bootstrap_compat.h" + +namespace google_breakpad { +//============================================================================== +MachSendMessage::MachSendMessage(int32_t message_id) : MachMessage() { + head.msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_COPY_SEND, 0); + + // head.msgh_remote_port = ...; // filled out in MachPortSender::SendMessage() + head.msgh_local_port = MACH_PORT_NULL; + head.msgh_reserved = 0; + head.msgh_id = 0; + + SetDescriptorCount(0); // start out with no descriptors + + SetMessageID(message_id); + SetData(NULL, 0); // client may add data later +} + +//============================================================================== +// returns true if successful +bool MachMessage::SetData(void *data, + int32_t data_length) { + // first check to make sure we have enough space + size_t size = CalculateSize(); + size_t new_size = size + data_length; + + if (new_size > sizeof(MachMessage)) { + return false; // not enough space + } + + GetDataPacket()->data_length = EndianU32_NtoL(data_length); + if (data) memcpy(GetDataPacket()->data, data, data_length); + + CalculateSize(); + + return true; +} + +//============================================================================== +// calculates and returns the total size of the message +// Currently, the entire message MUST fit inside of the MachMessage +// messsage size <= sizeof(MachMessage) +mach_msg_size_t MachMessage::CalculateSize() { + size_t size = sizeof(mach_msg_header_t) + sizeof(mach_msg_body_t); + + // add space for MessageDataPacket + int32_t alignedDataLength = (GetDataLength() + 3) & ~0x3; + size += 2*sizeof(int32_t) + alignedDataLength; + + // add space for descriptors + size += GetDescriptorCount() * sizeof(MachMsgPortDescriptor); + + head.msgh_size = static_cast<mach_msg_size_t>(size); + + return head.msgh_size; +} + +//============================================================================== +MachMessage::MessageDataPacket *MachMessage::GetDataPacket() { + size_t desc_size = sizeof(MachMsgPortDescriptor)*GetDescriptorCount(); + MessageDataPacket *packet = + reinterpret_cast<MessageDataPacket*>(padding + desc_size); + + return packet; +} + +//============================================================================== +void MachMessage::SetDescriptor(int n, + const MachMsgPortDescriptor &desc) { + MachMsgPortDescriptor *desc_array = + reinterpret_cast<MachMsgPortDescriptor*>(padding); + desc_array[n] = desc; +} + +//============================================================================== +// returns true if successful otherwise there was not enough space +bool MachMessage::AddDescriptor(const MachMsgPortDescriptor &desc) { + // first check to make sure we have enough space + int size = CalculateSize(); + size_t new_size = size + sizeof(MachMsgPortDescriptor); + + if (new_size > sizeof(MachMessage)) { + return false; // not enough space + } + + // unfortunately, we need to move the data to allow space for the + // new descriptor + u_int8_t *p = reinterpret_cast<u_int8_t*>(GetDataPacket()); + bcopy(p, p+sizeof(MachMsgPortDescriptor), GetDataLength()+2*sizeof(int32_t)); + + SetDescriptor(GetDescriptorCount(), desc); + SetDescriptorCount(GetDescriptorCount() + 1); + + CalculateSize(); + + return true; +} + +//============================================================================== +void MachMessage::SetDescriptorCount(int n) { + body.msgh_descriptor_count = n; + + if (n > 0) { + head.msgh_bits |= MACH_MSGH_BITS_COMPLEX; + } else { + head.msgh_bits &= ~MACH_MSGH_BITS_COMPLEX; + } +} + +//============================================================================== +MachMsgPortDescriptor *MachMessage::GetDescriptor(int n) { + if (n < GetDescriptorCount()) { + MachMsgPortDescriptor *desc = + reinterpret_cast<MachMsgPortDescriptor*>(padding); + return desc + n; + } + + return nil; +} + +//============================================================================== +mach_port_t MachMessage::GetTranslatedPort(int n) { + if (n < GetDescriptorCount()) { + return GetDescriptor(n)->GetMachPort(); + } + return MACH_PORT_NULL; +} + +#pragma mark - + +//============================================================================== +// create a new mach port for receiving messages and register a name for it +ReceivePort::ReceivePort(const char *receive_port_name) { + mach_port_t current_task = mach_task_self(); + + init_result_ = mach_port_allocate(current_task, + MACH_PORT_RIGHT_RECEIVE, + &port_); + + if (init_result_ != KERN_SUCCESS) + return; + + init_result_ = mach_port_insert_right(current_task, + port_, + port_, + MACH_MSG_TYPE_MAKE_SEND); + + if (init_result_ != KERN_SUCCESS) + return; + + mach_port_t task_bootstrap_port = 0; + init_result_ = task_get_bootstrap_port(current_task, &task_bootstrap_port); + + if (init_result_ != KERN_SUCCESS) + return; + + init_result_ = breakpad::BootstrapRegister( + bootstrap_port, + const_cast<char*>(receive_port_name), + port_); +} + +//============================================================================== +// create a new mach port for receiving messages +ReceivePort::ReceivePort() { + mach_port_t current_task = mach_task_self(); + + init_result_ = mach_port_allocate(current_task, + MACH_PORT_RIGHT_RECEIVE, + &port_); + + if (init_result_ != KERN_SUCCESS) + return; + + init_result_ = mach_port_insert_right(current_task, + port_, + port_, + MACH_MSG_TYPE_MAKE_SEND); +} + +//============================================================================== +// Given an already existing mach port, use it. We take ownership of the +// port and deallocate it in our destructor. +ReceivePort::ReceivePort(mach_port_t receive_port) + : port_(receive_port), + init_result_(KERN_SUCCESS) { +} + +//============================================================================== +ReceivePort::~ReceivePort() { + if (init_result_ == KERN_SUCCESS) + mach_port_deallocate(mach_task_self(), port_); +} + +//============================================================================== +kern_return_t ReceivePort::WaitForMessage(MachReceiveMessage *out_message, + mach_msg_timeout_t timeout) { + if (!out_message) { + return KERN_INVALID_ARGUMENT; + } + + // return any error condition encountered in constructor + if (init_result_ != KERN_SUCCESS) + return init_result_; + + out_message->head.msgh_bits = 0; + out_message->head.msgh_local_port = port_; + out_message->head.msgh_remote_port = MACH_PORT_NULL; + out_message->head.msgh_reserved = 0; + out_message->head.msgh_id = 0; + + mach_msg_option_t options = MACH_RCV_MSG; + if (timeout != MACH_MSG_TIMEOUT_NONE) + options |= MACH_RCV_TIMEOUT; + kern_return_t result = mach_msg(&out_message->head, + options, + 0, + sizeof(MachMessage), + port_, + timeout, // timeout in ms + MACH_PORT_NULL); + + return result; +} + +#pragma mark - + +//============================================================================== +// get a port with send rights corresponding to a named registered service +MachPortSender::MachPortSender(const char *receive_port_name) { + mach_port_t task_bootstrap_port = 0; + init_result_ = task_get_bootstrap_port(mach_task_self(), + &task_bootstrap_port); + + if (init_result_ != KERN_SUCCESS) + return; + + init_result_ = bootstrap_look_up(task_bootstrap_port, + const_cast<char*>(receive_port_name), + &send_port_); +} + +//============================================================================== +MachPortSender::MachPortSender(mach_port_t send_port) + : send_port_(send_port), + init_result_(KERN_SUCCESS) { +} + +//============================================================================== +kern_return_t MachPortSender::SendMessage(MachSendMessage &message, + mach_msg_timeout_t timeout) { + if (message.head.msgh_size == 0) { + return KERN_INVALID_VALUE; // just for safety -- never should occur + }; + + if (init_result_ != KERN_SUCCESS) + return init_result_; + + message.head.msgh_remote_port = send_port_; + + kern_return_t result = mach_msg(&message.head, + MACH_SEND_MSG | MACH_SEND_TIMEOUT, + message.head.msgh_size, + 0, + MACH_PORT_NULL, + timeout, // timeout in ms + MACH_PORT_NULL); + + return result; +} + +} // namespace google_breakpad diff --git a/3rdParty/Breakpad/src/common/mac/bootstrap_compat.cc b/3rdParty/Breakpad/src/common/mac/bootstrap_compat.cc new file mode 100644 index 0000000..d875d95 --- /dev/null +++ b/3rdParty/Breakpad/src/common/mac/bootstrap_compat.cc @@ -0,0 +1,42 @@ +// Copyright (c) 2012, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "common/mac/bootstrap_compat.h" + +namespace breakpad { + +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +kern_return_t BootstrapRegister(mach_port_t bp, + name_t service_name, + mach_port_t sp) { + return bootstrap_register(bp, service_name, sp); +} +#pragma GCC diagnostic warning "-Wdeprecated-declarations" + +} // namesapce breakpad diff --git a/3rdParty/Breakpad/src/common/mac/bootstrap_compat.h b/3rdParty/Breakpad/src/common/mac/bootstrap_compat.h new file mode 100644 index 0000000..8ca7357 --- /dev/null +++ b/3rdParty/Breakpad/src/common/mac/bootstrap_compat.h @@ -0,0 +1,54 @@ +// Copyright (c) 2012, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef COMMON_MAC_BOOTSTRAP_COMPAT_H_ +#define COMMON_MAC_BOOTSTRAP_COMPAT_H_ + +#include <servers/bootstrap.h> + +namespace breakpad { + +// Wrapper for bootstrap_register to avoid deprecation warnings. +// +// In 10.6, it's possible to call bootstrap_check_in as the one-stop-shop for +// handling what bootstrap_register is used for. In 10.5, bootstrap_check_in +// can't check in a service whose name has not yet been registered, despite +// bootstrap_register being marked as deprecated in that OS release. Breakpad +// needs to register new service names, and in 10.5, calling +// bootstrap_register is the only way to achieve that. Attempts to call +// bootstrap_check_in for a new service name on 10.5 will result in +// BOOTSTRAP_UNKNOWN_SERVICE being returned rather than registration of the +// new service name. +kern_return_t BootstrapRegister(mach_port_t bp, + name_t service_name, + mach_port_t sp); + +} // namespace breakpad + +#endif // COMMON_MAC_BOOTSTRAP_COMPAT_H_ diff --git a/3rdParty/Breakpad/src/common/mac/byteswap.h b/3rdParty/Breakpad/src/common/mac/byteswap.h new file mode 100644 index 0000000..a5d745b --- /dev/null +++ b/3rdParty/Breakpad/src/common/mac/byteswap.h @@ -0,0 +1,48 @@ +// -*- mode: c++ -*- + +// Copyright (c) 2010, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy <jim@mozilla.com> <jimb@red-bean.com> + +// byteswap.h: Overloaded functions for conveniently byteswapping values. + +#ifndef COMMON_MAC_BYTESWAP_H_ +#define COMMON_MAC_BYTESWAP_H_ + +#include <libkern/OSByteOrder.h> + +static inline uint16_t ByteSwap(uint16_t v) { return OSSwapInt16(v); } +static inline uint32_t ByteSwap(uint32_t v) { return OSSwapInt32(v); } +static inline uint64_t ByteSwap(uint64_t v) { return OSSwapInt64(v); } +static inline int16_t ByteSwap(int16_t v) { return OSSwapInt16(v); } +static inline int32_t ByteSwap(int32_t v) { return OSSwapInt32(v); } +static inline int64_t ByteSwap(int64_t v) { return OSSwapInt64(v); } + +#endif // COMMON_MAC_BYTESWAP_H_ diff --git a/3rdParty/Breakpad/src/common/mac/dump_syms.h b/3rdParty/Breakpad/src/common/mac/dump_syms.h new file mode 100644 index 0000000..0e2f464 --- /dev/null +++ b/3rdParty/Breakpad/src/common/mac/dump_syms.h @@ -0,0 +1,172 @@ +// -*- mode: c++ -*- + +// Copyright (c) 2011, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// dump_syms.h: Declaration of google_breakpad::DumpSymbols, a class for +// reading debugging information from Mach-O files and writing it out as a +// Breakpad symbol file. + +#include <Foundation/Foundation.h> +#include <mach-o/loader.h> +#include <stdio.h> +#include <stdlib.h> + +#include <ostream> +#include <string> +#include <vector> + +#include "common/byte_cursor.h" +#include "common/mac/macho_reader.h" +#include "common/module.h" + +namespace google_breakpad { + +class DumpSymbols { + public: + DumpSymbols() + : input_pathname_(), + object_filename_(), + contents_(), + selected_object_file_(), + selected_object_name_() { } + ~DumpSymbols() { + [input_pathname_ release]; + [object_filename_ release]; + [contents_ release]; + } + + // Prepare to read debugging information from |filename|. |filename| may be + // the name of a universal binary, a Mach-O file, or a dSYM bundle + // containing either of the above. On success, return true; if there is a + // problem reading |filename|, report it and return false. + // + // (This class uses NSString for filenames and related values, + // because the Mac Foundation framework seems to support + // filename-related operations more fully on NSString values.) + bool Read(NSString *filename); + + // If this dumper's file includes an object file for |cpu_type| and + // |cpu_subtype|, then select that object file for dumping, and return + // true. Otherwise, return false, and leave this dumper's selected + // architecture unchanged. + // + // By default, if this dumper's file contains only one object file, then + // the dumper will dump those symbols; and if it contains more than one + // object file, then the dumper will dump the object file whose + // architecture matches that of this dumper program. + bool SetArchitecture(cpu_type_t cpu_type, cpu_subtype_t cpu_subtype); + + // If this dumper's file includes an object file for |arch_name|, then select + // that object file for dumping, and return true. Otherwise, return false, + // and leave this dumper's selected architecture unchanged. + // + // By default, if this dumper's file contains only one object file, then + // the dumper will dump those symbols; and if it contains more than one + // object file, then the dumper will dump the object file whose + // architecture matches that of this dumper program. + bool SetArchitecture(const std::string &arch_name); + + // Return a pointer to an array of 'struct fat_arch' structures, + // describing the object files contained in this dumper's file. Set + // *|count| to the number of elements in the array. The returned array is + // owned by this DumpSymbols instance. + // + // If there are no available architectures, this function + // may return NULL. + const struct fat_arch *AvailableArchitectures(size_t *count) { + *count = object_files_.size(); + if (object_files_.size() > 0) + return &object_files_[0]; + return NULL; + } + + // Read the selected object file's debugging information, and write it out to + // |stream|. Write the CFI section if |cfi| is true. Return true on success; + // if an error occurs, report it and return false. + bool WriteSymbolFile(std::ostream &stream, bool cfi); + + private: + // Used internally. + class DumperLineToModule; + class LoadCommandDumper; + + // Return an identifier string for the file this DumpSymbols is dumping. + std::string Identifier(); + + // Read debugging information from |dwarf_sections|, which was taken from + // |macho_reader|, and add it to |module|. On success, return true; + // on failure, report the problem and return false. + bool ReadDwarf(google_breakpad::Module *module, + const mach_o::Reader &macho_reader, + const mach_o::SectionMap &dwarf_sections) const; + + // Read DWARF CFI or .eh_frame data from |section|, belonging to + // |macho_reader|, and record it in |module|. If |eh_frame| is true, + // then the data is .eh_frame-format data; otherwise, it is standard DWARF + // .debug_frame data. On success, return true; on failure, report + // the problem and return false. + bool ReadCFI(google_breakpad::Module *module, + const mach_o::Reader &macho_reader, + const mach_o::Section §ion, + bool eh_frame) const; + + // The name of the file or bundle whose symbols this will dump. + // This is the path given to Read, for use in error messages. + NSString *input_pathname_; + + // The name of the file this DumpSymbols will actually read debugging + // information from. Normally, this is the same as input_pathname_, but if + // filename refers to a dSYM bundle, then this is the resource file + // within that bundle. + NSString *object_filename_; + + // The complete contents of object_filename_, mapped into memory. + NSData *contents_; + + // A vector of fat_arch structures describing the object files + // object_filename_ contains. If object_filename_ refers to a fat binary, + // this may have more than one element; if it refers to a Mach-O file, this + // has exactly one element. + vector<struct fat_arch> object_files_; + + // The object file in object_files_ selected to dump, or NULL if + // SetArchitecture hasn't been called yet. + const struct fat_arch *selected_object_file_; + + // A string that identifies the selected object file, for use in error + // messages. This is usually object_filename_, but if that refers to a + // fat binary, it includes an indication of the particular architecture + // within that binary. + string selected_object_name_; +}; + +} // namespace google_breakpad diff --git a/3rdParty/Breakpad/src/common/mac/dump_syms.mm b/3rdParty/Breakpad/src/common/mac/dump_syms.mm new file mode 100644 index 0000000..9783514 --- /dev/null +++ b/3rdParty/Breakpad/src/common/mac/dump_syms.mm @@ -0,0 +1,496 @@ +// -*- mode: c++ -*- + +// Copyright (c) 2011, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// dump_syms.mm: Create a symbol file for use with minidumps + +#include "common/mac/dump_syms.h" + +#include <Foundation/Foundation.h> +#include <mach-o/arch.h> +#include <mach-o/fat.h> +#include <stdio.h> + +#include <ostream> +#include <string> +#include <vector> + +#include "common/dwarf/bytereader-inl.h" +#include "common/dwarf/dwarf2reader.h" +#include "common/dwarf_cfi_to_module.h" +#include "common/dwarf_cu_to_module.h" +#include "common/dwarf_line_to_module.h" +#include "common/mac/file_id.h" +#include "common/mac/macho_reader.h" +#include "common/module.h" +#include "common/stabs_reader.h" +#include "common/stabs_to_module.h" + +#ifndef CPU_TYPE_ARM +#define CPU_TYPE_ARM (static_cast<cpu_type_t>(12)) +#endif // CPU_TYPE_ARM + +using dwarf2reader::ByteReader; +using google_breakpad::DwarfCUToModule; +using google_breakpad::DwarfLineToModule; +using google_breakpad::FileID; +using google_breakpad::mach_o::FatReader; +using google_breakpad::mach_o::Section; +using google_breakpad::mach_o::Segment; +using google_breakpad::Module; +using google_breakpad::StabsReader; +using google_breakpad::StabsToModule; +using std::make_pair; +using std::pair; +using std::string; +using std::vector; + +namespace google_breakpad { + +bool DumpSymbols::Read(NSString *filename) { + if (![[NSFileManager defaultManager] fileExistsAtPath:filename]) { + fprintf(stderr, "Object file does not exist: %s\n", + [filename fileSystemRepresentation]); + return false; + } + + input_pathname_ = [filename retain]; + + // Does this filename refer to a dSYM bundle? + NSBundle *bundle = [NSBundle bundleWithPath:input_pathname_]; + + if (bundle) { + // Filenames referring to bundles usually have names of the form + // "<basename>.dSYM"; however, if the user has specified a wrapper + // suffix (the WRAPPER_SUFFIX and WRAPPER_EXTENSION build settings), + // then the name may have the form "<basename>.<extension>.dSYM". In + // either case, the resource name for the file containing the DWARF + // info within the bundle is <basename>. + // + // Since there's no way to tell how much to strip off, remove one + // extension at a time, and use the first one that + // pathForResource:ofType:inDirectory likes. + NSString *base_name = [input_pathname_ lastPathComponent]; + NSString *dwarf_resource; + + do { + NSString *new_base_name = [base_name stringByDeletingPathExtension]; + + // If stringByDeletingPathExtension returned the name unchanged, then + // there's nothing more for us to strip off --- lose. + if ([new_base_name isEqualToString:base_name]) { + fprintf(stderr, "Unable to find DWARF-bearing file in bundle: %s\n", + [input_pathname_ fileSystemRepresentation]); + return false; + } + + // Take the shortened result as our new base_name. + base_name = new_base_name; + + // Try to find a DWARF resource in the bundle under the new base_name. + dwarf_resource = [bundle pathForResource:base_name + ofType:nil inDirectory:@"DWARF"]; + } while (!dwarf_resource); + + object_filename_ = [dwarf_resource retain]; + } else { + object_filename_ = [input_pathname_ retain]; + } + + // Read the file's contents into memory. + // + // The documentation for dataWithContentsOfMappedFile says: + // + // Because of file mapping restrictions, this method should only be + // used if the file is guaranteed to exist for the duration of the + // data object’s existence. It is generally safer to use the + // dataWithContentsOfFile: method. + // + // I gather this means that OS X doesn't have (or at least, that method + // doesn't use) a form of mapping like Linux's MAP_PRIVATE, where the + // process appears to get its own copy of the data, and changes to the + // file don't affect memory and vice versa). + NSError *error; + contents_ = [NSData dataWithContentsOfFile:object_filename_ + options:0 + error:&error]; + if (!contents_) { + fprintf(stderr, "Error reading object file: %s: %s\n", + [object_filename_ fileSystemRepresentation], + [[error localizedDescription] UTF8String]); + return false; + } + [contents_ retain]; + + // Get the list of object files present in the file. + FatReader::Reporter fat_reporter([object_filename_ + fileSystemRepresentation]); + FatReader fat_reader(&fat_reporter); + if (!fat_reader.Read(reinterpret_cast<const uint8_t *>([contents_ bytes]), + [contents_ length])) { + return false; + } + + // Get our own copy of fat_reader's object file list. + size_t object_files_count; + const struct fat_arch *object_files = + fat_reader.object_files(&object_files_count); + if (object_files_count == 0) { + fprintf(stderr, "Fat binary file contains *no* architectures: %s\n", + [object_filename_ fileSystemRepresentation]); + return false; + } + object_files_.resize(object_files_count); + memcpy(&object_files_[0], object_files, + sizeof(struct fat_arch) * object_files_count); + + return true; +} + +bool DumpSymbols::SetArchitecture(cpu_type_t cpu_type, + cpu_subtype_t cpu_subtype) { + // Find the best match for the architecture the user requested. + const struct fat_arch *best_match + = NXFindBestFatArch(cpu_type, cpu_subtype, &object_files_[0], + static_cast<uint32_t>(object_files_.size())); + if (!best_match) return false; + + // Record the selected object file. + selected_object_file_ = best_match; + return true; +} + +bool DumpSymbols::SetArchitecture(const std::string &arch_name) { + bool arch_set = false; + const NXArchInfo *arch_info = NXGetArchInfoFromName(arch_name.c_str()); + if (arch_info) { + arch_set = SetArchitecture(arch_info->cputype, arch_info->cpusubtype); + } + return arch_set; +} + +string DumpSymbols::Identifier() { + FileID file_id([object_filename_ fileSystemRepresentation]); + unsigned char identifier_bytes[16]; + cpu_type_t cpu_type = selected_object_file_->cputype; + if (!file_id.MachoIdentifier(cpu_type, identifier_bytes)) { + fprintf(stderr, "Unable to calculate UUID of mach-o binary %s!\n", + [object_filename_ fileSystemRepresentation]); + return ""; + } + + char identifier_string[40]; + FileID::ConvertIdentifierToString(identifier_bytes, identifier_string, + sizeof(identifier_string)); + + string compacted(identifier_string); + for(size_t i = compacted.find('-'); i != string::npos; + i = compacted.find('-', i)) + compacted.erase(i, 1); + + return compacted; +} + +// A line-to-module loader that accepts line number info parsed by +// dwarf2reader::LineInfo and populates a Module and a line vector +// with the results. +class DumpSymbols::DumperLineToModule: + public DwarfCUToModule::LineToModuleFunctor { + public: + // Create a line-to-module converter using BYTE_READER. + DumperLineToModule(dwarf2reader::ByteReader *byte_reader) + : byte_reader_(byte_reader) { } + void operator()(const char *program, uint64 length, + Module *module, vector<Module::Line> *lines) { + DwarfLineToModule handler(module, lines); + dwarf2reader::LineInfo parser(program, length, byte_reader_, &handler); + parser.Start(); + } + private: + dwarf2reader::ByteReader *byte_reader_; // WEAK +}; + +bool DumpSymbols::ReadDwarf(google_breakpad::Module *module, + const mach_o::Reader &macho_reader, + const mach_o::SectionMap &dwarf_sections) const { + // Build a byte reader of the appropriate endianness. + ByteReader byte_reader(macho_reader.big_endian() + ? dwarf2reader::ENDIANNESS_BIG + : dwarf2reader::ENDIANNESS_LITTLE); + + // Construct a context for this file. + DwarfCUToModule::FileContext file_context(selected_object_name_, + module); + + // Build a dwarf2reader::SectionMap from our mach_o::SectionMap. + for (mach_o::SectionMap::const_iterator it = dwarf_sections.begin(); + it != dwarf_sections.end(); it++) { + file_context.section_map[it->first] = + make_pair(reinterpret_cast<const char *>(it->second.contents.start), + it->second.contents.Size()); + } + + // Find the __debug_info section. + std::pair<const char *, uint64> debug_info_section + = file_context.section_map["__debug_info"]; + // There had better be a __debug_info section! + if (!debug_info_section.first) { + fprintf(stderr, "%s: __DWARF segment of file has no __debug_info section\n", + selected_object_name_.c_str()); + return false; + } + + // Build a line-to-module loader for the root handler to use. + DumperLineToModule line_to_module(&byte_reader); + + // Walk the __debug_info section, one compilation unit at a time. + uint64 debug_info_length = debug_info_section.second; + for (uint64 offset = 0; offset < debug_info_length;) { + // Make a handler for the root DIE that populates MODULE with the + // debug info. + DwarfCUToModule::WarningReporter reporter(selected_object_name_, + offset); + DwarfCUToModule root_handler(&file_context, &line_to_module, &reporter); + // Make a Dwarf2Handler that drives our DIEHandler. + dwarf2reader::DIEDispatcher die_dispatcher(&root_handler); + // Make a DWARF parser for the compilation unit at OFFSET. + dwarf2reader::CompilationUnit dwarf_reader(file_context.section_map, + offset, + &byte_reader, + &die_dispatcher); + // Process the entire compilation unit; get the offset of the next. + offset += dwarf_reader.Start(); + } + + return true; +} + +bool DumpSymbols::ReadCFI(google_breakpad::Module *module, + const mach_o::Reader &macho_reader, + const mach_o::Section §ion, + bool eh_frame) const { + // Find the appropriate set of register names for this file's + // architecture. + vector<string> register_names; + switch (macho_reader.cpu_type()) { + case CPU_TYPE_X86: + register_names = DwarfCFIToModule::RegisterNames::I386(); + break; + case CPU_TYPE_X86_64: + register_names = DwarfCFIToModule::RegisterNames::X86_64(); + break; + case CPU_TYPE_ARM: + register_names = DwarfCFIToModule::RegisterNames::ARM(); + break; + default: { + const NXArchInfo *arch = + NXGetArchInfoFromCpuType(macho_reader.cpu_type(), + macho_reader.cpu_subtype()); + fprintf(stderr, "%s: cannot convert DWARF call frame information for ", + selected_object_name_.c_str()); + if (arch) + fprintf(stderr, "architecture '%s'", arch->name); + else + fprintf(stderr, "architecture %d,%d", + macho_reader.cpu_type(), macho_reader.cpu_subtype()); + fprintf(stderr, " to Breakpad symbol file: no register name table\n"); + return false; + } + } + + // Find the call frame information and its size. + const char *cfi = reinterpret_cast<const char *>(section.contents.start); + size_t cfi_size = section.contents.Size(); + + // Plug together the parser, handler, and their entourages. + DwarfCFIToModule::Reporter module_reporter(selected_object_name_, + section.section_name); + DwarfCFIToModule handler(module, register_names, &module_reporter); + dwarf2reader::ByteReader byte_reader(macho_reader.big_endian() ? + dwarf2reader::ENDIANNESS_BIG : + dwarf2reader::ENDIANNESS_LITTLE); + byte_reader.SetAddressSize(macho_reader.bits_64() ? 8 : 4); + // At the moment, according to folks at Apple and some cursory + // investigation, Mac OS X only uses DW_EH_PE_pcrel-based pointers, so + // this is the only base address the CFI parser will need. + byte_reader.SetCFIDataBase(section.address, cfi); + + dwarf2reader::CallFrameInfo::Reporter dwarf_reporter(selected_object_name_, + section.section_name); + dwarf2reader::CallFrameInfo parser(cfi, cfi_size, + &byte_reader, &handler, &dwarf_reporter, + eh_frame); + parser.Start(); + return true; +} + +// A LoadCommandHandler that loads whatever debugging data it finds into a +// Module. +class DumpSymbols::LoadCommandDumper: + public mach_o::Reader::LoadCommandHandler { + public: + // Create a load command dumper handling load commands from READER's + // file, and adding data to MODULE. + LoadCommandDumper(const DumpSymbols &dumper, + google_breakpad::Module *module, + const mach_o::Reader &reader) + : dumper_(dumper), module_(module), reader_(reader) { } + + bool SegmentCommand(const mach_o::Segment &segment); + bool SymtabCommand(const ByteBuffer &entries, const ByteBuffer &strings); + + private: + const DumpSymbols &dumper_; + google_breakpad::Module *module_; // WEAK + const mach_o::Reader &reader_; +}; + +bool DumpSymbols::LoadCommandDumper::SegmentCommand(const Segment &segment) { + mach_o::SectionMap section_map; + if (!reader_.MapSegmentSections(segment, §ion_map)) + return false; + + if (segment.name == "__TEXT") { + module_->SetLoadAddress(segment.vmaddr); + mach_o::SectionMap::const_iterator eh_frame = + section_map.find("__eh_frame"); + if (eh_frame != section_map.end()) { + // If there is a problem reading this, don't treat it as a fatal error. + dumper_.ReadCFI(module_, reader_, eh_frame->second, true); + } + return true; + } + + if (segment.name == "__DWARF") { + if (!dumper_.ReadDwarf(module_, reader_, section_map)) + return false; + mach_o::SectionMap::const_iterator debug_frame + = section_map.find("__debug_frame"); + if (debug_frame != section_map.end()) { + // If there is a problem reading this, don't treat it as a fatal error. + dumper_.ReadCFI(module_, reader_, debug_frame->second, false); + } + } + + return true; +} + +bool DumpSymbols::LoadCommandDumper::SymtabCommand(const ByteBuffer &entries, + const ByteBuffer &strings) { + StabsToModule stabs_to_module(module_); + // Mac OS X STABS are never "unitized", and the size of the 'value' field + // matches the address size of the executable. + StabsReader stabs_reader(entries.start, entries.Size(), + strings.start, strings.Size(), + reader_.big_endian(), + reader_.bits_64() ? 8 : 4, + true, + &stabs_to_module); + if (!stabs_reader.Process()) + return false; + stabs_to_module.Finalize(); + return true; +} + +bool DumpSymbols::WriteSymbolFile(std::ostream &stream, bool cfi) { + // Select an object file, if SetArchitecture hasn't been called to set one + // explicitly. + if (!selected_object_file_) { + // If there's only one architecture, that's the one. + if (object_files_.size() == 1) + selected_object_file_ = &object_files_[0]; + else { + // Look for an object file whose architecture matches our own. + const NXArchInfo *local_arch = NXGetLocalArchInfo(); + if (!SetArchitecture(local_arch->cputype, local_arch->cpusubtype)) { + fprintf(stderr, "%s: object file contains more than one" + " architecture, none of which match the current" + " architecture; specify an architecture explicitly" + " with '-a ARCH' to resolve the ambiguity\n", + [object_filename_ fileSystemRepresentation]); + return false; + } + } + } + + assert(selected_object_file_); + + // Find the name of the selected file's architecture, to appear in + // the MODULE record and in error messages. + const NXArchInfo *selected_arch_info + = NXGetArchInfoFromCpuType(selected_object_file_->cputype, + selected_object_file_->cpusubtype); + + const char *selected_arch_name = selected_arch_info->name; + if (strcmp(selected_arch_name, "i386") == 0) + selected_arch_name = "x86"; + + // Produce a name to use in error messages that includes the + // filename, and the architecture, if there is more than one. + selected_object_name_ = [object_filename_ UTF8String]; + if (object_files_.size() > 1) { + selected_object_name_ += ", architecture "; + selected_object_name_ + selected_arch_name; + } + + // Compute a module name, to appear in the MODULE record. + NSString *module_name = [object_filename_ lastPathComponent]; + + // Choose an identifier string, to appear in the MODULE record. + string identifier = Identifier(); + if (identifier.empty()) + return false; + identifier += "0"; + + // Create a module to hold the debugging information. + Module module([module_name UTF8String], "mac", selected_arch_name, + identifier); + + // Parse the selected object file. + mach_o::Reader::Reporter reporter(selected_object_name_); + mach_o::Reader reader(&reporter); + if (!reader.Read(reinterpret_cast<const uint8_t *>([contents_ bytes]) + + selected_object_file_->offset, + selected_object_file_->size, + selected_object_file_->cputype, + selected_object_file_->cpusubtype)) + return false; + + // Walk its load commands, and deal with whatever is there. + LoadCommandDumper load_command_dumper(*this, &module, reader); + if (!reader.WalkLoadCommands(&load_command_dumper)) + return false; + + return module.Write(stream, cfi); +} + +} // namespace google_breakpad diff --git a/3rdParty/Breakpad/src/common/mac/file_id.cc b/3rdParty/Breakpad/src/common/mac/file_id.cc new file mode 100644 index 0000000..50502e4 --- /dev/null +++ b/3rdParty/Breakpad/src/common/mac/file_id.cc @@ -0,0 +1,101 @@ +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// file_id.cc: Return a unique identifier for a file +// +// See file_id.h for documentation +// +// Author: Dan Waylonis + +#include <fcntl.h> +#include <string.h> +#include <unistd.h> + +#include "common/mac/file_id.h" +#include "common/mac/macho_id.h" + +using MacFileUtilities::MachoID; + +namespace google_breakpad { + +FileID::FileID(const char *path) { + strlcpy(path_, path, sizeof(path_)); +} + +bool FileID::FileIdentifier(unsigned char identifier[16]) { + int fd = open(path_, O_RDONLY); + if (fd == -1) + return false; + + MD5Context md5; + MD5Init(&md5); + + // Read 4k x 2 bytes at a time. This is faster than just 4k bytes, but + // doesn't seem to be an unreasonable size for the stack. + unsigned char buffer[4096 * 2]; + size_t buffer_size = sizeof(buffer); + while ((buffer_size = read(fd, buffer, buffer_size) > 0)) { + MD5Update(&md5, buffer, buffer_size); + } + + close(fd); + MD5Final(identifier, &md5); + + return true; +} + +bool FileID::MachoIdentifier(int cpu_type, unsigned char identifier[16]) { + MachoID macho(path_); + + if (macho.UUIDCommand(cpu_type, identifier)) + return true; + + return macho.MD5(cpu_type, identifier); +} + +// static +void FileID::ConvertIdentifierToString(const unsigned char identifier[16], + char *buffer, int buffer_length) { + int buffer_idx = 0; + for (int idx = 0; (buffer_idx < buffer_length) && (idx < 16); ++idx) { + int hi = (identifier[idx] >> 4) & 0x0F; + int lo = (identifier[idx]) & 0x0F; + + if (idx == 4 || idx == 6 || idx == 8 || idx == 10) + buffer[buffer_idx++] = '-'; + + buffer[buffer_idx++] = (hi >= 10) ? 'A' + hi - 10 : '0' + hi; + buffer[buffer_idx++] = (lo >= 10) ? 'A' + lo - 10 : '0' + lo; + } + + // NULL terminate + buffer[(buffer_idx < buffer_length) ? buffer_idx : buffer_idx - 1] = 0; +} + +} // namespace google_breakpad diff --git a/3rdParty/Breakpad/src/common/mac/file_id.h b/3rdParty/Breakpad/src/common/mac/file_id.h new file mode 100644 index 0000000..eb06b0d --- /dev/null +++ b/3rdParty/Breakpad/src/common/mac/file_id.h @@ -0,0 +1,78 @@ +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// file_id.h: Return a unique identifier for a file +// +// Author: Dan Waylonis + +#ifndef COMMON_MAC_FILE_ID_H__ +#define COMMON_MAC_FILE_ID_H__ + +#include <limits.h> + +namespace google_breakpad { + +class FileID { + public: + FileID(const char *path); + ~FileID() {}; + + // Load the identifier for the file path specified in the constructor into + // |identifier|. Return false if the identifier could not be created for the + // file. + // The current implementation will return the MD5 hash of the file's bytes. + bool FileIdentifier(unsigned char identifier[16]); + + // Treat the file as a mach-o file that will contain one or more archicture. + // Accepted values for |cpu_type| (e.g., CPU_TYPE_X86 or CPU_TYPE_POWERPC) + // are listed in /usr/include/mach/machine.h. + // If |cpu_type| is 0, then the native cpu type is used. + // Returns false if opening the file failed or if the |cpu_type| is not + // present in the file. + // Return the unique identifier in |identifier|. + // The current implementation will look for the (in order of priority): + // LC_UUID, LC_ID_DYLIB, or MD5 hash of the given |cpu_type|. + bool MachoIdentifier(int cpu_type, unsigned char identifier[16]); + + // Convert the |identifier| data to a NULL terminated string. The string will + // be formatted as a UUID (e.g., 22F065BB-FC9C-49F7-80FE-26A7CEBD7BCE). + // The |buffer| should be at least 37 bytes long to receive all of the data + // and termination. Shorter buffers will contain truncated data. + static void ConvertIdentifierToString(const unsigned char identifier[16], + char *buffer, int buffer_length); + + private: + // Storage for the path specified + char path_[PATH_MAX]; +}; + +} // namespace google_breakpad + +#endif // COMMON_MAC_FILE_ID_H__ + diff --git a/3rdParty/Breakpad/src/common/mac/macho_id.cc b/3rdParty/Breakpad/src/common/mac/macho_id.cc new file mode 100644 index 0000000..abe1fab --- /dev/null +++ b/3rdParty/Breakpad/src/common/mac/macho_id.cc @@ -0,0 +1,366 @@ +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// macho_id.cc: Functions to gather identifying information from a macho file +// +// See macho_id.h for documentation +// +// Author: Dan Waylonis + +extern "C" { // necessary for Leopard + #include <fcntl.h> + #include <mach-o/loader.h> + #include <mach-o/swap.h> + #include <stdio.h> + #include <stdlib.h> + #include <string.h> + #include <sys/time.h> + #include <sys/types.h> + #include <unistd.h> +} + +#include "common/mac/macho_id.h" +#include "common/mac/macho_walker.h" +#include "common/mac/macho_utilities.h" + +namespace MacFileUtilities { + +using google_breakpad::MD5Init; +using google_breakpad::MD5Update; +using google_breakpad::MD5Final; + +MachoID::MachoID(const char *path) + : memory_(0), + memory_size_(0), + crc_(0), + md5_context_(), + update_function_(NULL) { + strlcpy(path_, path, sizeof(path_)); +} + +MachoID::MachoID(const char *path, void *memory, size_t size) + : memory_(memory), + memory_size_(size), + crc_(0), + md5_context_(), + update_function_(NULL) { + strlcpy(path_, path, sizeof(path_)); +} + +MachoID::~MachoID() { +} + +// The CRC info is from http://en.wikipedia.org/wiki/Adler-32 +// With optimizations from http://www.zlib.net/ + +// The largest prime smaller than 65536 +#define MOD_ADLER 65521 +// MAX_BLOCK is the largest n such that 255n(n+1)/2 + (n+1)(MAX_BLOCK-1) <= 2^32-1 +#define MAX_BLOCK 5552 + +void MachoID::UpdateCRC(unsigned char *bytes, size_t size) { +// Unrolled loops for summing +#define DO1(buf,i) {sum1 += (buf)[i]; sum2 += sum1;} +#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1); +#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2); +#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); +#define DO16(buf) DO8(buf,0); DO8(buf,8); + // Split up the crc + uint32_t sum1 = crc_ & 0xFFFF; + uint32_t sum2 = (crc_ >> 16) & 0xFFFF; + + // Do large blocks + while (size >= MAX_BLOCK) { + size -= MAX_BLOCK; + int block_count = MAX_BLOCK / 16; + do { + DO16(bytes); + bytes += 16; + } while (--block_count); + sum1 %= MOD_ADLER; + sum2 %= MOD_ADLER; + } + + // Do remaining bytes + if (size) { + while (size >= 16) { + size -= 16; + DO16(bytes); + bytes += 16; + } + while (size--) { + sum1 += *bytes++; + sum2 += sum1; + } + sum1 %= MOD_ADLER; + sum2 %= MOD_ADLER; + crc_ = (sum2 << 16) | sum1; + } +} + +void MachoID::UpdateMD5(unsigned char *bytes, size_t size) { + MD5Update(&md5_context_, bytes, size); +} + +void MachoID::Update(MachoWalker *walker, off_t offset, size_t size) { + if (!update_function_ || !size) + return; + + // Read up to 4k bytes at a time + unsigned char buffer[4096]; + size_t buffer_size; + off_t file_offset = offset; + while (size > 0) { + if (size > sizeof(buffer)) { + buffer_size = sizeof(buffer); + size -= buffer_size; + } else { + buffer_size = size; + size = 0; + } + + if (!walker->ReadBytes(buffer, buffer_size, file_offset)) + return; + + (this->*update_function_)(buffer, buffer_size); + file_offset += buffer_size; + } +} + +bool MachoID::UUIDCommand(int cpu_type, unsigned char bytes[16]) { + struct breakpad_uuid_command uuid_cmd; + uuid_cmd.cmd = 0; + if (!WalkHeader(cpu_type, UUIDWalkerCB, &uuid_cmd)) + return false; + + // If we found the command, we'll have initialized the uuid_command + // structure + if (uuid_cmd.cmd == LC_UUID) { + memcpy(bytes, uuid_cmd.uuid, sizeof(uuid_cmd.uuid)); + return true; + } + + return false; +} + +bool MachoID::IDCommand(int cpu_type, unsigned char identifier[16]) { + struct dylib_command dylib_cmd; + dylib_cmd.cmd = 0; + if (!WalkHeader(cpu_type, IDWalkerCB, &dylib_cmd)) + return false; + + // If we found the command, we'll have initialized the dylib_command + // structure + if (dylib_cmd.cmd == LC_ID_DYLIB) { + // Take the hashed filename, version, and compatability version bytes + // to form the first 12 bytes, pad the rest with zeros + + // create a crude hash of the filename to generate the first 4 bytes + identifier[0] = 0; + identifier[1] = 0; + identifier[2] = 0; + identifier[3] = 0; + + for (int j = 0, i = (int)strlen(path_)-1; i>=0 && path_[i]!='/'; ++j, --i) { + identifier[j%4] += path_[i]; + } + + identifier[4] = (dylib_cmd.dylib.current_version >> 24) & 0xFF; + identifier[5] = (dylib_cmd.dylib.current_version >> 16) & 0xFF; + identifier[6] = (dylib_cmd.dylib.current_version >> 8) & 0xFF; + identifier[7] = dylib_cmd.dylib.current_version & 0xFF; + identifier[8] = (dylib_cmd.dylib.compatibility_version >> 24) & 0xFF; + identifier[9] = (dylib_cmd.dylib.compatibility_version >> 16) & 0xFF; + identifier[10] = (dylib_cmd.dylib.compatibility_version >> 8) & 0xFF; + identifier[11] = dylib_cmd.dylib.compatibility_version & 0xFF; + identifier[12] = (cpu_type >> 24) & 0xFF; + identifier[13] = (cpu_type >> 16) & 0xFF; + identifier[14] = (cpu_type >> 8) & 0xFF; + identifier[15] = cpu_type & 0xFF; + + return true; + } + + return false; +} + +uint32_t MachoID::Adler32(int cpu_type) { + update_function_ = &MachoID::UpdateCRC; + crc_ = 0; + + if (!WalkHeader(cpu_type, WalkerCB, this)) + return 0; + + return crc_; +} + +bool MachoID::MD5(int cpu_type, unsigned char identifier[16]) { + update_function_ = &MachoID::UpdateMD5; + + MD5Init(&md5_context_); + + if (!WalkHeader(cpu_type, WalkerCB, this)) + return false; + + MD5Final(identifier, &md5_context_); + return true; +} + +bool MachoID::WalkHeader(int cpu_type, + MachoWalker::LoadCommandCallback callback, + void *context) { + if (memory_) { + MachoWalker walker(memory_, memory_size_, callback, context); + return walker.WalkHeader(cpu_type); + } else { + MachoWalker walker(path_, callback, context); + return walker.WalkHeader(cpu_type); + } +} + +// static +bool MachoID::WalkerCB(MachoWalker *walker, load_command *cmd, off_t offset, + bool swap, void *context) { + MachoID *macho_id = (MachoID *)context; + + if (cmd->cmd == LC_SEGMENT) { + struct segment_command seg; + + if (!walker->ReadBytes(&seg, sizeof(seg), offset)) + return false; + + if (swap) + swap_segment_command(&seg, NXHostByteOrder()); + + struct mach_header_64 header; + off_t header_offset; + + if (!walker->CurrentHeader(&header, &header_offset)) + return false; + + // Process segments that have sections: + // (e.g., __TEXT, __DATA, __IMPORT, __OBJC) + offset += sizeof(struct segment_command); + struct section sec; + for (unsigned long i = 0; i < seg.nsects; ++i) { + if (!walker->ReadBytes(&sec, sizeof(sec), offset)) + return false; + + if (swap) + swap_section(&sec, 1, NXHostByteOrder()); + + // sections of type S_ZEROFILL are "virtual" and contain no data + // in the file itself + if ((sec.flags & SECTION_TYPE) != S_ZEROFILL && sec.offset != 0) + macho_id->Update(walker, header_offset + sec.offset, sec.size); + + offset += sizeof(struct section); + } + } else if (cmd->cmd == LC_SEGMENT_64) { + struct segment_command_64 seg64; + + if (!walker->ReadBytes(&seg64, sizeof(seg64), offset)) + return false; + + if (swap) + breakpad_swap_segment_command_64(&seg64, NXHostByteOrder()); + + struct mach_header_64 header; + off_t header_offset; + + if (!walker->CurrentHeader(&header, &header_offset)) + return false; + + // Process segments that have sections: + // (e.g., __TEXT, __DATA, __IMPORT, __OBJC) + offset += sizeof(struct segment_command_64); + struct section_64 sec64; + for (unsigned long i = 0; i < seg64.nsects; ++i) { + if (!walker->ReadBytes(&sec64, sizeof(sec64), offset)) + return false; + + if (swap) + breakpad_swap_section_64(&sec64, 1, NXHostByteOrder()); + + // sections of type S_ZEROFILL are "virtual" and contain no data + // in the file itself + if ((sec64.flags & SECTION_TYPE) != S_ZEROFILL && sec64.offset != 0) + macho_id->Update(walker, + header_offset + sec64.offset, + (size_t)sec64.size); + + offset += sizeof(struct section_64); + } + } + + // Continue processing + return true; +} + +// static +bool MachoID::UUIDWalkerCB(MachoWalker *walker, load_command *cmd, off_t offset, + bool swap, void *context) { + if (cmd->cmd == LC_UUID) { + struct breakpad_uuid_command *uuid_cmd = + (struct breakpad_uuid_command *)context; + + if (!walker->ReadBytes(uuid_cmd, sizeof(struct breakpad_uuid_command), + offset)) + return false; + + if (swap) + breakpad_swap_uuid_command(uuid_cmd, NXHostByteOrder()); + + return false; + } + + // Continue processing + return true; +} + +// static +bool MachoID::IDWalkerCB(MachoWalker *walker, load_command *cmd, off_t offset, + bool swap, void *context) { + if (cmd->cmd == LC_ID_DYLIB) { + struct dylib_command *dylib_cmd = (struct dylib_command *)context; + + if (!walker->ReadBytes(dylib_cmd, sizeof(struct dylib_command), offset)) + return false; + + if (swap) + swap_dylib_command(dylib_cmd, NXHostByteOrder()); + + return false; + } + + // Continue processing + return true; +} + +} // namespace MacFileUtilities diff --git a/3rdParty/Breakpad/src/common/mac/macho_id.h b/3rdParty/Breakpad/src/common/mac/macho_id.h new file mode 100644 index 0000000..ccb126d --- /dev/null +++ b/3rdParty/Breakpad/src/common/mac/macho_id.h @@ -0,0 +1,120 @@ +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// macho_id.h: Functions to gather identifying information from a macho file +// +// Author: Dan Waylonis + +#ifndef COMMON_MAC_MACHO_ID_H__ +#define COMMON_MAC_MACHO_ID_H__ + +#include <limits.h> +#include <mach-o/loader.h> + +#include "common/mac/macho_walker.h" +#include "common/md5.h" + +namespace MacFileUtilities { + +class MachoID { + public: + MachoID(const char *path); + MachoID(const char *path, void *memory, size_t size); + ~MachoID(); + + // For the given |cpu_type|, return a UUID from the LC_UUID command. + // Return false if there isn't a LC_UUID command. + bool UUIDCommand(int cpu_type, unsigned char identifier[16]); + + // For the given |cpu_type|, return a UUID from the LC_ID_DYLIB command. + // Return false if there isn't a LC_ID_DYLIB command. + bool IDCommand(int cpu_type, unsigned char identifier[16]); + + // For the given |cpu_type|, return the Adler32 CRC for the mach-o data + // segment(s). + // Return 0 on error (e.g., if the file is not a mach-o file) + uint32_t Adler32(int cpu_type); + + // For the given |cpu_type|, return the MD5 for the mach-o data segment(s). + // Return true on success, false otherwise + bool MD5(int cpu_type, unsigned char identifier[16]); + + private: + // Signature of class member function to be called with data read from file + typedef void (MachoID::*UpdateFunction)(unsigned char *bytes, size_t size); + + // Update the CRC value by examining |size| |bytes| and applying the algorithm + // to each byte. + void UpdateCRC(unsigned char *bytes, size_t size); + + // Update the MD5 value by examining |size| |bytes| and applying the algorithm + // to each byte. + void UpdateMD5(unsigned char *bytes, size_t size); + + // Bottleneck for update routines + void Update(MachoWalker *walker, off_t offset, size_t size); + + // Factory for the MachoWalker + bool WalkHeader(int cpu_type, MachoWalker::LoadCommandCallback callback, + void *context); + + // The callback from the MachoWalker for CRC and MD5 + static bool WalkerCB(MachoWalker *walker, load_command *cmd, off_t offset, + bool swap, void *context); + + // The callback from the MachoWalker for LC_UUID + static bool UUIDWalkerCB(MachoWalker *walker, load_command *cmd, off_t offset, + bool swap, void *context); + + // The callback from the MachoWalker for LC_ID_DYLIB + static bool IDWalkerCB(MachoWalker *walker, load_command *cmd, off_t offset, + bool swap, void *context); + + // File path + char path_[PATH_MAX]; + + // Memory region to read from + void *memory_; + + // Size of the memory region + size_t memory_size_; + + // The current crc value + uint32_t crc_; + + // The MD5 context + google_breakpad::MD5Context md5_context_; + + // The current update to call from the Update callback + UpdateFunction update_function_; +}; + +} // namespace MacFileUtilities + +#endif // COMMON_MAC_MACHO_ID_H__ diff --git a/3rdParty/Breakpad/src/common/mac/macho_reader.cc b/3rdParty/Breakpad/src/common/mac/macho_reader.cc new file mode 100644 index 0000000..f1f0a17 --- /dev/null +++ b/3rdParty/Breakpad/src/common/mac/macho_reader.cc @@ -0,0 +1,530 @@ +// Copyright (c) 2010, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// macho_reader.cc: Implementation of google_breakpad::Mach_O::FatReader and +// google_breakpad::Mach_O::Reader. See macho_reader.h for details. + +#include "common/mac/macho_reader.h" + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> + +// Unfortunately, CPU_TYPE_ARM is not define for 10.4. +#if !defined(CPU_TYPE_ARM) +#define CPU_TYPE_ARM 12 +#endif + +namespace google_breakpad { +namespace mach_o { + +// If NDEBUG is #defined, then the 'assert' macro doesn't evaluate its +// arguments, so you can't place expressions that do necessary work in +// the argument of an assert. Nor can you assign the result of the +// expression to a variable and assert that the variable's value is +// true: you'll get unused variable warnings when NDEBUG is #defined. +// +// ASSERT_ALWAYS_EVAL always evaluates its argument, and asserts that +// the result is true if NDEBUG is not #defined. +#if defined(NDEBUG) +#define ASSERT_ALWAYS_EVAL(x) (x) +#else +#define ASSERT_ALWAYS_EVAL(x) assert(x) +#endif + +void FatReader::Reporter::BadHeader() { + fprintf(stderr, "%s: file is neither a fat binary file" + " nor a Mach-O object file\n", filename_.c_str()); +} + +void FatReader::Reporter::TooShort() { + fprintf(stderr, "%s: file too short for the data it claims to contain\n", + filename_.c_str()); +} + +void FatReader::Reporter::MisplacedObjectFile() { + fprintf(stderr, "%s: file too short for the object files it claims" + " to contain\n", filename_.c_str()); +} + +bool FatReader::Read(const uint8_t *buffer, size_t size) { + buffer_.start = buffer; + buffer_.end = buffer + size; + ByteCursor cursor(&buffer_); + + // Fat binaries always use big-endian, so read the magic number in + // that endianness. To recognize Mach-O magic numbers, which can use + // either endianness, check for both the proper and reversed forms + // of the magic numbers. + cursor.set_big_endian(true); + if (cursor >> magic_) { + if (magic_ == FAT_MAGIC) { + // How many object files does this fat binary contain? + uint32_t object_files_count; + if (!(cursor >> object_files_count)) { // nfat_arch + reporter_->TooShort(); + return false; + } + + // Read the list of object files. + object_files_.resize(object_files_count); + for (size_t i = 0; i < object_files_count; i++) { + struct fat_arch *objfile = &object_files_[i]; + + // Read this object file entry, byte-swapping as appropriate. + cursor >> objfile->cputype + >> objfile->cpusubtype + >> objfile->offset + >> objfile->size + >> objfile->align; + if (!cursor) { + reporter_->TooShort(); + return false; + } + // Does the file actually have the bytes this entry refers to? + size_t fat_size = buffer_.Size(); + if (objfile->offset > fat_size || + objfile->size > fat_size - objfile->offset) { + reporter_->MisplacedObjectFile(); + return false; + } + } + + return true; + } else if (magic_ == MH_MAGIC || magic_ == MH_MAGIC_64 || + magic_ == MH_CIGAM || magic_ == MH_CIGAM_64) { + // If this is a little-endian Mach-O file, fix the cursor's endianness. + if (magic_ == MH_CIGAM || magic_ == MH_CIGAM_64) + cursor.set_big_endian(false); + // Record the entire file as a single entry in the object file list. + object_files_.resize(1); + + // Get the cpu type and subtype from the Mach-O header. + if (!(cursor >> object_files_[0].cputype + >> object_files_[0].cpusubtype)) { + reporter_->TooShort(); + return false; + } + + object_files_[0].offset = 0; + object_files_[0].size = static_cast<uint32_t>(buffer_.Size()); + // This alignment is correct for 32 and 64-bit x86 and ppc. + // See get_align in the lipo source for other architectures: + // http://www.opensource.apple.com/source/cctools/cctools-773/misc/lipo.c + object_files_[0].align = 12; // 2^12 == 4096 + + return true; + } + } + + reporter_->BadHeader(); + return false; +} + +void Reader::Reporter::BadHeader() { + fprintf(stderr, "%s: file is not a Mach-O object file\n", filename_.c_str()); +} + +void Reader::Reporter::CPUTypeMismatch(cpu_type_t cpu_type, + cpu_subtype_t cpu_subtype, + cpu_type_t expected_cpu_type, + cpu_subtype_t expected_cpu_subtype) { + fprintf(stderr, "%s: CPU type %d, subtype %d does not match expected" + " type %d, subtype %d\n", + filename_.c_str(), cpu_type, cpu_subtype, + expected_cpu_type, expected_cpu_subtype); +} + +void Reader::Reporter::HeaderTruncated() { + fprintf(stderr, "%s: file does not contain a complete Mach-O header\n", + filename_.c_str()); +} + +void Reader::Reporter::LoadCommandRegionTruncated() { + fprintf(stderr, "%s: file too short to hold load command region" + " given in Mach-O header\n", filename_.c_str()); +} + +void Reader::Reporter::LoadCommandsOverrun(size_t claimed, size_t i, + LoadCommandType type) { + fprintf(stderr, "%s: file's header claims there are %ld" + " load commands, but load command #%ld", + filename_.c_str(), claimed, i); + if (type) fprintf(stderr, ", of type %d,", type); + fprintf(stderr, " extends beyond the end of the load command region\n"); +} + +void Reader::Reporter::LoadCommandTooShort(size_t i, LoadCommandType type) { + fprintf(stderr, "%s: the contents of load command #%ld, of type %d," + " extend beyond the size given in the load command's header\n", + filename_.c_str(), i, type); +} + +void Reader::Reporter::SectionsMissing(const string &name) { + fprintf(stderr, "%s: the load command for segment '%s'" + " is too short to hold the section headers it claims to have\n", + filename_.c_str(), name.c_str()); +} + +void Reader::Reporter::MisplacedSegmentData(const string &name) { + fprintf(stderr, "%s: the segment '%s' claims its contents lie beyond" + " the end of the file\n", filename_.c_str(), name.c_str()); +} + +void Reader::Reporter::MisplacedSectionData(const string §ion, + const string &segment) { + fprintf(stderr, "%s: the section '%s' in segment '%s'" + " claims its contents lie outside the segment's contents\n", + filename_.c_str(), section.c_str(), segment.c_str()); +} + +void Reader::Reporter::MisplacedSymbolTable() { + fprintf(stderr, "%s: the LC_SYMTAB load command claims that the symbol" + " table's contents are located beyond the end of the file\n", + filename_.c_str()); +} + +void Reader::Reporter::UnsupportedCPUType(cpu_type_t cpu_type) { + fprintf(stderr, "%s: CPU type %d is not supported\n", + filename_.c_str(), cpu_type); +} + +bool Reader::Read(const uint8_t *buffer, + size_t size, + cpu_type_t expected_cpu_type, + cpu_subtype_t expected_cpu_subtype) { + assert(!buffer_.start); + buffer_.start = buffer; + buffer_.end = buffer + size; + ByteCursor cursor(&buffer_, true); + uint32_t magic; + if (!(cursor >> magic)) { + reporter_->HeaderTruncated(); + return false; + } + + if (expected_cpu_type != CPU_TYPE_ANY) { + uint32_t expected_magic; + // validate that magic matches the expected cpu type + switch (expected_cpu_type) { + case CPU_TYPE_ARM: + case CPU_TYPE_I386: + expected_magic = MH_CIGAM; + break; + case CPU_TYPE_POWERPC: + expected_magic = MH_MAGIC; + break; + case CPU_TYPE_X86_64: + expected_magic = MH_CIGAM_64; + break; + case CPU_TYPE_POWERPC64: + expected_magic = MH_MAGIC_64; + break; + default: + reporter_->UnsupportedCPUType(expected_cpu_type); + return false; + } + + if (expected_magic != magic) { + reporter_->BadHeader(); + return false; + } + } + + // Since the byte cursor is in big-endian mode, a reversed magic number + // always indicates a little-endian file, regardless of our own endianness. + switch (magic) { + case MH_MAGIC: big_endian_ = true; bits_64_ = false; break; + case MH_CIGAM: big_endian_ = false; bits_64_ = false; break; + case MH_MAGIC_64: big_endian_ = true; bits_64_ = true; break; + case MH_CIGAM_64: big_endian_ = false; bits_64_ = true; break; + default: + reporter_->BadHeader(); + return false; + } + cursor.set_big_endian(big_endian_); + uint32_t commands_size, reserved; + cursor >> cpu_type_ >> cpu_subtype_ >> file_type_ >> load_command_count_ + >> commands_size >> flags_; + if (bits_64_) + cursor >> reserved; + if (!cursor) { + reporter_->HeaderTruncated(); + return false; + } + + if (expected_cpu_type != CPU_TYPE_ANY && + (expected_cpu_type != cpu_type_ || + expected_cpu_subtype != cpu_subtype_)) { + reporter_->CPUTypeMismatch(cpu_type_, cpu_subtype_, + expected_cpu_type, expected_cpu_subtype); + return false; + } + + cursor + .PointTo(&load_commands_.start, commands_size) + .PointTo(&load_commands_.end, 0); + if (!cursor) { + reporter_->LoadCommandRegionTruncated(); + return false; + } + + return true; +} + +bool Reader::WalkLoadCommands(Reader::LoadCommandHandler *handler) const { + ByteCursor list_cursor(&load_commands_, big_endian_); + + for (size_t index = 0; index < load_command_count_; ++index) { + // command refers to this load command alone, so that cursor will + // refuse to read past the load command's end. But since we haven't + // read the size yet, let command initially refer to the entire + // remainder of the load command series. + ByteBuffer command(list_cursor.here(), list_cursor.Available()); + ByteCursor cursor(&command, big_endian_); + + // Read the command type and size --- fields common to all commands. + uint32_t type, size; + if (!(cursor >> type)) { + reporter_->LoadCommandsOverrun(load_command_count_, index, 0); + return false; + } + if (!(cursor >> size) || size > command.Size()) { + reporter_->LoadCommandsOverrun(load_command_count_, index, type); + return false; + } + + // Now that we've read the length, restrict command's range to this + // load command only. + command.end = command.start + size; + + switch (type) { + case LC_SEGMENT: + case LC_SEGMENT_64: { + Segment segment; + segment.bits_64 = (type == LC_SEGMENT_64); + size_t word_size = segment.bits_64 ? 8 : 4; + cursor.CString(&segment.name, 16); + size_t file_offset, file_size; + cursor + .Read(word_size, false, &segment.vmaddr) + .Read(word_size, false, &segment.vmsize) + .Read(word_size, false, &file_offset) + .Read(word_size, false, &file_size); + cursor >> segment.maxprot + >> segment.initprot + >> segment.nsects + >> segment.flags; + if (!cursor) { + reporter_->LoadCommandTooShort(index, type); + return false; + } + if (file_offset > buffer_.Size() || + file_size > buffer_.Size() - file_offset) { + reporter_->MisplacedSegmentData(segment.name); + return false; + } + // Mach-O files in .dSYM bundles have the contents of the loaded + // segments removed, and their file offsets and file sizes zeroed + // out. To help us handle this special case properly, give such + // segments' contents NULL starting and ending pointers. + if (file_offset == 0 && file_size == 0) { + segment.contents.start = segment.contents.end = NULL; + } else { + segment.contents.start = buffer_.start + file_offset; + segment.contents.end = segment.contents.start + file_size; + } + // The section list occupies the remainder of this load command's space. + segment.section_list.start = cursor.here(); + segment.section_list.end = command.end; + + if (!handler->SegmentCommand(segment)) + return false; + break; + } + + case LC_SYMTAB: { + uint32_t symoff, nsyms, stroff, strsize; + cursor >> symoff >> nsyms >> stroff >> strsize; + if (!cursor) { + reporter_->LoadCommandTooShort(index, type); + return false; + } + // How big are the entries in the symbol table? + // sizeof(struct nlist_64) : sizeof(struct nlist), + // but be paranoid about alignment vs. target architecture. + size_t symbol_size = bits_64_ ? 16 : 12; + // How big is the entire symbol array? + size_t symbols_size = nsyms * symbol_size; + if (symoff > buffer_.Size() || symbols_size > buffer_.Size() - symoff || + stroff > buffer_.Size() || strsize > buffer_.Size() - stroff) { + reporter_->MisplacedSymbolTable(); + return false; + } + ByteBuffer entries(buffer_.start + symoff, symbols_size); + ByteBuffer names(buffer_.start + stroff, strsize); + if (!handler->SymtabCommand(entries, names)) + return false; + break; + } + + default: { + if (!handler->UnknownCommand(type, command)) + return false; + break; + } + } + + list_cursor.set_here(command.end); + } + + return true; +} + +// A load command handler that looks for a segment of a given name. +class Reader::SegmentFinder : public LoadCommandHandler { + public: + // Create a load command handler that looks for a segment named NAME, + // and sets SEGMENT to describe it if found. + SegmentFinder(const string &name, Segment *segment) + : name_(name), segment_(segment), found_() { } + + // Return true if the traversal found the segment, false otherwise. + bool found() const { return found_; } + + bool SegmentCommand(const Segment &segment) { + if (segment.name == name_) { + *segment_ = segment; + found_ = true; + return false; + } + return true; + } + + private: + // The name of the segment our creator is looking for. + const string &name_; + + // Where we should store the segment if found. (WEAK) + Segment *segment_; + + // True if we found the segment. + bool found_; +}; + +bool Reader::FindSegment(const string &name, Segment *segment) const { + SegmentFinder finder(name, segment); + WalkLoadCommands(&finder); + return finder.found(); +} + +bool Reader::WalkSegmentSections(const Segment &segment, + SectionHandler *handler) const { + size_t word_size = segment.bits_64 ? 8 : 4; + ByteCursor cursor(&segment.section_list, big_endian_); + + for (size_t i = 0; i < segment.nsects; i++) { + Section section; + section.bits_64 = segment.bits_64; + uint64_t size; + uint32_t offset, dummy32; + cursor + .CString(§ion.section_name, 16) + .CString(§ion.segment_name, 16) + .Read(word_size, false, §ion.address) + .Read(word_size, false, &size) + >> offset + >> section.align + >> dummy32 + >> dummy32 + >> section.flags + >> dummy32 + >> dummy32; + if (section.bits_64) + cursor >> dummy32; + if (!cursor) { + reporter_->SectionsMissing(segment.name); + return false; + } + if ((section.flags & SECTION_TYPE) == S_ZEROFILL) { + // Zero-fill sections have a size, but no contents. + section.contents.start = section.contents.end = NULL; + } else if (segment.contents.start == NULL && + segment.contents.end == NULL) { + // Mach-O files in .dSYM bundles have the contents of the loaded + // segments removed, and their file offsets and file sizes zeroed + // out. However, the sections within those segments still have + // non-zero sizes. There's no reason to call MisplacedSectionData in + // this case; the caller may just need the section's load + // address. But do set the contents' limits to NULL, for safety. + section.contents.start = section.contents.end = NULL; + } else { + if (offset < size_t(segment.contents.start - buffer_.start) || + offset > size_t(segment.contents.end - buffer_.start) || + size > size_t(segment.contents.end - buffer_.start - offset)) { + reporter_->MisplacedSectionData(section.section_name, + section.segment_name); + return false; + } + section.contents.start = buffer_.start + offset; + section.contents.end = section.contents.start + size; + } + if (!handler->HandleSection(section)) + return false; + } + return true; +} + +// A SectionHandler that builds a SectionMap for the sections within a +// given segment. +class Reader::SectionMapper: public SectionHandler { + public: + // Create a SectionHandler that populates MAP with an entry for + // each section it is given. + SectionMapper(SectionMap *map) : map_(map) { } + bool HandleSection(const Section §ion) { + (*map_)[section.section_name] = section; + return true; + } + private: + // The map under construction. (WEAK) + SectionMap *map_; +}; + +bool Reader::MapSegmentSections(const Segment &segment, + SectionMap *section_map) const { + section_map->clear(); + SectionMapper mapper(section_map); + return WalkSegmentSections(segment, &mapper); +} + +} // namespace mach_o +} // namespace google_breakpad diff --git a/3rdParty/Breakpad/src/common/mac/macho_reader.h b/3rdParty/Breakpad/src/common/mac/macho_reader.h new file mode 100644 index 0000000..7537648 --- /dev/null +++ b/3rdParty/Breakpad/src/common/mac/macho_reader.h @@ -0,0 +1,459 @@ +// -*- mode: C++ -*- + +// Copyright (c) 2010, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// macho_reader.h: A class for parsing Mach-O files. + +#ifndef BREAKPAD_COMMON_MAC_MACHO_READER_H_ +#define BREAKPAD_COMMON_MAC_MACHO_READER_H_ + +#include <mach-o/loader.h> +#include <mach-o/fat.h> +#include <stdint.h> +#include <stdlib.h> +#include <unistd.h> + +#include <map> +#include <string> +#include <vector> + +#include "common/byte_cursor.h" + +namespace google_breakpad { +namespace mach_o { + +using std::map; +using std::string; +using std::vector; + +// The Mac headers don't specify particular types for these groups of +// constants, but defining them here provides some documentation +// value. We also give them the same width as the fields in which +// they appear, which makes them a bit easier to use with ByteCursors. +typedef uint32_t Magic; +typedef uint32_t FileType; +typedef uint32_t FileFlags; +typedef uint32_t LoadCommandType; +typedef uint32_t SegmentFlags; +typedef uint32_t SectionFlags; + +// A parser for fat binary files, used to store universal binaries. +// When applied to a (non-fat) Mach-O file, this behaves as if the +// file were a fat file containing a single object file. +class FatReader { + public: + + // A class for reporting errors found while parsing fat binary files. The + // default definitions of these methods print messages to stderr. + class Reporter { + public: + // Create a reporter that attributes problems to |filename|. + explicit Reporter(const string &filename) : filename_(filename) { } + + virtual ~Reporter() { } + + // The data does not begin with a fat binary or Mach-O magic number. + // This is a fatal error. + virtual void BadHeader(); + + // The Mach-O fat binary file ends abruptly, without enough space + // to contain an object file it claims is present. + virtual void MisplacedObjectFile(); + + // The file ends abruptly: either it is not large enough to hold a + // complete header, or the header implies that contents are present + // beyond the actual end of the file. + virtual void TooShort(); + + private: + // The filename to which the reader should attribute problems. + string filename_; + }; + + // Create a fat binary file reader that uses |reporter| to report problems. + explicit FatReader(Reporter *reporter) : reporter_(reporter) { } + + // Read the |size| bytes at |buffer| as a fat binary file. On success, + // return true; on failure, report the problem to reporter_ and return + // false. + // + // If the data is a plain Mach-O file, rather than a fat binary file, + // then the reader behaves as if it had found a fat binary file whose + // single object file is the Mach-O file. + bool Read(const uint8_t *buffer, size_t size); + + // Return an array of 'struct fat_arch' structures describing the + // object files present in this fat binary file. Set |size| to the + // number of elements in the array. + // + // Assuming Read returned true, the entries are validated: it is + // safe to assume that the offsets and sizes in each 'struct + // fat_arch' refer to subranges of the bytes passed to Read. + // + // If there are no object files in this fat binary, then this + // function can return NULL. + // + // The array is owned by this FatReader instance; it will be freed when + // this FatReader is destroyed. + // + // This function returns a C-style array instead of a vector to make it + // possible to use the result with OS X functions like NXFindBestFatArch, + // so that the symbol dumper will behave consistently with other OS X + // utilities that work with fat binaries. + const struct fat_arch *object_files(size_t *count) const { + *count = object_files_.size(); + if (object_files_.size() > 0) + return &object_files_[0]; + return NULL; + } + + private: + // We use this to report problems parsing the file's contents. (WEAK) + Reporter *reporter_; + + // The contents of the fat binary or Mach-O file we're parsing. We do not + // own the storage it refers to. + ByteBuffer buffer_; + + // The magic number of this binary, in host byte order. + Magic magic_; + + // The list of object files in this binary. + // object_files_.size() == fat_header.nfat_arch + vector<struct fat_arch> object_files_; +}; + +// A segment in a Mach-O file. All these fields have been byte-swapped as +// appropriate for use by the executing architecture. +struct Segment { + // The ByteBuffers below point into the bytes passed to the Reader that + // created this Segment. + + ByteBuffer section_list; // This segment's section list. + ByteBuffer contents; // This segment's contents. + + // This segment's name. + string name; + + // The address at which this segment should be loaded in memory. If + // bits_64 is false, only the bottom 32 bits of this value are valid. + uint64_t vmaddr; + + // The size of this segment when loaded into memory. This may be larger + // than contents.Size(), in which case the extra area will be + // initialized with zeros. If bits_64 is false, only the bottom 32 bits + // of this value are valid. + uint64_t vmsize; + + // The maximum and initial VM protection of this segment's contents. + uint32_t maxprot; + uint32_t initprot; + + // The number of sections in section_list. + uint32_t nsects; + + // Flags describing this segment, from SegmentFlags. + uint32_t flags; + + // True if this is a 64-bit section; false if it is a 32-bit section. + bool bits_64; +}; + +// A section in a Mach-O file. All these fields have been byte-swapped as +// appropriate for use by the executing architecture. +struct Section { + // This section's contents. This points into the bytes passed to the + // Reader that created this Section. + ByteBuffer contents; + + // This section's name. + string section_name; // section[_64].sectname + // The name of the segment this section belongs to. + string segment_name; // section[_64].segname + + // The address at which this section's contents should be loaded in + // memory. If bits_64 is false, only the bottom 32 bits of this value + // are valid. + uint64_t address; + + // The contents of this section should be loaded into memory at an + // address which is a multiple of (two raised to this power). + uint32_t align; + + // Flags from SectionFlags describing the section's contents. + uint32_t flags; + + // We don't support reading relocations yet. + + // True if this is a 64-bit section; false if it is a 32-bit section. + bool bits_64; +}; + +// A map from section names to Sections. +typedef map<string, Section> SectionMap; + +// A reader for a Mach-O file. +// +// This does not handle fat binaries; see FatReader above. FatReader +// provides a friendly interface for parsing data that could be either a +// fat binary or a Mach-O file. +class Reader { + public: + + // A class for reporting errors found while parsing Mach-O files. The + // default definitions of these member functions print messages to + // stderr. + class Reporter { + public: + // Create a reporter that attributes problems to |filename|. + explicit Reporter(const string &filename) : filename_(filename) { } + virtual ~Reporter() { } + + // Reporter functions for fatal errors return void; the reader will + // definitely return an error to its caller after calling them + + // The data does not begin with a Mach-O magic number, or the magic + // number does not match the expected value for the cpu architecture. + // This is a fatal error. + virtual void BadHeader(); + + // The data contained in a Mach-O fat binary (|cpu_type|, |cpu_subtype|) + // does not match the expected CPU architecture + // (|expected_cpu_type|, |expected_cpu_subtype|). + virtual void CPUTypeMismatch(cpu_type_t cpu_type, + cpu_subtype_t cpu_subtype, + cpu_type_t expected_cpu_type, + cpu_subtype_t expected_cpu_subtype); + + // The file ends abruptly: either it is not large enough to hold a + // complete header, or the header implies that contents are present + // beyond the actual end of the file. + virtual void HeaderTruncated(); + + // The file's load command region, as given in the Mach-O header, is + // too large for the file. + virtual void LoadCommandRegionTruncated(); + + // The file's Mach-O header claims the file contains |claimed| load + // commands, but the I'th load command, of type |type|, extends beyond + // the end of the load command region, as given by the Mach-O header. + // If |type| is zero, the command's type was unreadable. + virtual void LoadCommandsOverrun(size_t claimed, size_t i, + LoadCommandType type); + + // The contents of the |i|'th load command, of type |type|, extend beyond + // the size given in the load command's header. + virtual void LoadCommandTooShort(size_t i, LoadCommandType type); + + // The LC_SEGMENT or LC_SEGMENT_64 load command for the segment named + // |name| is too short to hold the sections that its header says it does. + // (This more specific than LoadCommandTooShort.) + virtual void SectionsMissing(const string &name); + + // The segment named |name| claims that its contents lie beyond the end + // of the file. + virtual void MisplacedSegmentData(const string &name); + + // The section named |section| in the segment named |segment| claims that + // its contents do not lie entirely within the segment. + virtual void MisplacedSectionData(const string §ion, + const string &segment); + + // The LC_SYMTAB command claims that symbol table contents are located + // beyond the end of the file. + virtual void MisplacedSymbolTable(); + + // An attempt was made to read a Mach-O file of the unsupported + // CPU architecture |cpu_type|. + virtual void UnsupportedCPUType(cpu_type_t cpu_type); + + private: + string filename_; + }; + + // A handler for sections parsed from a segment. The WalkSegmentSections + // member function accepts an instance of this class, and applies it to + // each section defined in a given segment. + class SectionHandler { + public: + virtual ~SectionHandler() { } + + // Called to report that the segment's section list contains |section|. + // This should return true if the iteration should continue, or false + // if it should stop. + virtual bool HandleSection(const Section §ion) = 0; + }; + + // A handler for the load commands in a Mach-O file. + class LoadCommandHandler { + public: + LoadCommandHandler() { } + virtual ~LoadCommandHandler() { } + + // When called from WalkLoadCommands, the following handler functions + // should return true if they wish to continue iterating over the load + // command list, or false if they wish to stop iterating. + // + // When called from LoadCommandIterator::Handle or Reader::Handle, + // these functions' return values are simply passed through to Handle's + // caller. + // + // The definitions provided by this base class simply return true; the + // default is to silently ignore sections whose member functions the + // subclass doesn't override. + + // COMMAND is load command we don't recognize. We provide only the + // command type and a ByteBuffer enclosing the command's data (If we + // cannot parse the command type or its size, we call + // reporter_->IncompleteLoadCommand instead.) + virtual bool UnknownCommand(LoadCommandType type, + const ByteBuffer &contents) { + return true; + } + + // The load command is LC_SEGMENT or LC_SEGMENT_64, defining a segment + // with the properties given in |segment|. + virtual bool SegmentCommand(const Segment &segment) { + return true; + } + + // The load command is LC_SYMTAB. |entries| holds the array of nlist + // entries, and |names| holds the strings the entries refer to. + virtual bool SymtabCommand(const ByteBuffer &entries, + const ByteBuffer &names) { + return true; + } + + // Add handler functions for more load commands here as needed. + }; + + // Create a Mach-O file reader that reports problems to |reporter|. + explicit Reader(Reporter *reporter) + : reporter_(reporter) { } + + // Read the given data as a Mach-O file. The reader retains pointers + // into the data passed, so the data should live as long as the reader + // does. On success, return true; on failure, return false. + // + // At most one of these functions should be invoked once on each Reader + // instance. + bool Read(const uint8_t *buffer, + size_t size, + cpu_type_t expected_cpu_type, + cpu_subtype_t expected_cpu_subtype); + bool Read(const ByteBuffer &buffer, + cpu_type_t expected_cpu_type, + cpu_subtype_t expected_cpu_subtype) { + return Read(buffer.start, + buffer.Size(), + expected_cpu_type, + expected_cpu_subtype); + } + + // Return this file's characteristics, as found in the Mach-O header. + cpu_type_t cpu_type() const { return cpu_type_; } + cpu_subtype_t cpu_subtype() const { return cpu_subtype_; } + FileType file_type() const { return file_type_; } + FileFlags flags() const { return flags_; } + + // Return true if this is a 64-bit Mach-O file, false if it is a 32-bit + // Mach-O file. + bool bits_64() const { return bits_64_; } + + // Return true if this is a big-endian Mach-O file, false if it is + // little-endian. + bool big_endian() const { return big_endian_; } + + // Apply |handler| to each load command in this Mach-O file, stopping when + // a handler function returns false. If we encounter a malformed load + // command, report it via reporter_ and return false. Return true if all + // load commands were parseable and all handlers returned true. + bool WalkLoadCommands(LoadCommandHandler *handler) const; + + // Set |segment| to describe the segment named |name|, if present. If + // found, |segment|'s byte buffers refer to a subregion of the bytes + // passed to Read. If we find the section, return true; otherwise, + // return false. + bool FindSegment(const string &name, Segment *segment) const; + + // Apply |handler| to each section defined in |segment|. If |handler| returns + // false, stop iterating and return false. If all calls to |handler| return + // true and we reach the end of the section list, return true. + bool WalkSegmentSections(const Segment &segment, SectionHandler *handler) + const; + + // Clear |section_map| and then populate it with a map of the sections + // in |segment|, from section names to Section structures. + // Each Section's contents refer to bytes in |segment|'s contents. + // On success, return true; if a problem occurs, report it and return false. + bool MapSegmentSections(const Segment &segment, SectionMap *section_map) + const; + + private: + // Used internally. + class SegmentFinder; + class SectionMapper; + + // We use this to report problems parsing the file's contents. (WEAK) + Reporter *reporter_; + + // The contents of the Mach-O file we're parsing. We do not own the + // storage it refers to. + ByteBuffer buffer_; + + // True if this file is big-endian. + bool big_endian_; + + // True if this file is a 64-bit Mach-O file. + bool bits_64_; + + // This file's cpu type and subtype. + cpu_type_t cpu_type_; // mach_header[_64].cputype + cpu_subtype_t cpu_subtype_; // mach_header[_64].cpusubtype + + // This file's type. + FileType file_type_; // mach_header[_64].filetype + + // The region of buffer_ occupied by load commands. + ByteBuffer load_commands_; + + // The number of load commands in load_commands_. + uint32_t load_command_count_; // mach_header[_64].ncmds + + // This file's header flags. + FileFlags flags_; +}; + +} // namespace mach_o +} // namespace google_breakpad + +#endif // BREAKPAD_COMMON_MAC_MACHO_READER_H_ diff --git a/3rdParty/Breakpad/src/common/mac/macho_utilities.cc b/3rdParty/Breakpad/src/common/mac/macho_utilities.cc new file mode 100644 index 0000000..89f9e77 --- /dev/null +++ b/3rdParty/Breakpad/src/common/mac/macho_utilities.cc @@ -0,0 +1,90 @@ +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// macho_utilties.cc: Utilities for dealing with mach-o files +// +// Author: Dave Camp + +#include "common/mac/byteswap.h" +#include "common/mac/macho_utilities.h" + +void breakpad_swap_uuid_command(struct breakpad_uuid_command *uc, + enum NXByteOrder target_byte_order) +{ + uc->cmd = ByteSwap(uc->cmd); + uc->cmdsize = ByteSwap(uc->cmdsize); +} + +void breakpad_swap_segment_command_64(struct segment_command_64 *sg, + enum NXByteOrder target_byte_order) +{ + sg->cmd = ByteSwap(sg->cmd); + sg->cmdsize = ByteSwap(sg->cmdsize); + + sg->vmaddr = ByteSwap(sg->vmaddr); + sg->vmsize = ByteSwap(sg->vmsize); + sg->fileoff = ByteSwap(sg->fileoff); + sg->filesize = ByteSwap(sg->filesize); + + sg->maxprot = ByteSwap(sg->maxprot); + sg->initprot = ByteSwap(sg->initprot); + sg->nsects = ByteSwap(sg->nsects); + sg->flags = ByteSwap(sg->flags); +} + +void breakpad_swap_mach_header_64(struct mach_header_64 *mh, + enum NXByteOrder target_byte_order) +{ + mh->magic = ByteSwap(mh->magic); + mh->cputype = ByteSwap(mh->cputype); + mh->cpusubtype = ByteSwap(mh->cpusubtype); + mh->filetype = ByteSwap(mh->filetype); + mh->ncmds = ByteSwap(mh->ncmds); + mh->sizeofcmds = ByteSwap(mh->sizeofcmds); + mh->flags = ByteSwap(mh->flags); + mh->reserved = ByteSwap(mh->reserved); +} + +void breakpad_swap_section_64(struct section_64 *s, + uint32_t nsects, + enum NXByteOrder target_byte_order) +{ + for (uint32_t i = 0; i < nsects; i++) { + s[i].addr = ByteSwap(s[i].addr); + s[i].size = ByteSwap(s[i].size); + + s[i].offset = ByteSwap(s[i].offset); + s[i].align = ByteSwap(s[i].align); + s[i].reloff = ByteSwap(s[i].reloff); + s[i].nreloc = ByteSwap(s[i].nreloc); + s[i].flags = ByteSwap(s[i].flags); + s[i].reserved1 = ByteSwap(s[i].reserved1); + s[i].reserved2 = ByteSwap(s[i].reserved2); + } +} diff --git a/3rdParty/Breakpad/src/common/mac/macho_utilities.h b/3rdParty/Breakpad/src/common/mac/macho_utilities.h new file mode 100644 index 0000000..a07945f --- /dev/null +++ b/3rdParty/Breakpad/src/common/mac/macho_utilities.h @@ -0,0 +1,92 @@ +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// macho_utilities.h: Utilities for dealing with mach-o files +// +// Author: Dave Camp + +#ifndef COMMON_MAC_MACHO_UTILITIES_H__ +#define COMMON_MAC_MACHO_UTILITIES_H__ + +#include <mach-o/loader.h> +#include <mach/thread_status.h> + +/* Some #defines and structs that aren't defined in older SDKs */ +#ifndef CPU_ARCH_ABI64 +# define CPU_ARCH_ABI64 0x01000000 +#endif + +#ifndef CPU_TYPE_X86 +# define CPU_TYPE_X86 CPU_TYPE_I386 +#endif + +#ifndef CPU_TYPE_POWERPC64 +# define CPU_TYPE_POWERPC64 (CPU_TYPE_POWERPC | CPU_ARCH_ABI64) +#endif + +#ifndef LC_UUID +# define LC_UUID 0x1b /* the uuid */ +#endif + +#if TARGET_CPU_X86 +# define BREAKPAD_MACHINE_THREAD_STATE i386_THREAD_STATE +#elif TARGET_CPU_X86_64 +# define BREAKPAD_MACHINE_THREAD_STATE x86_THREAD_STATE64 +#else +# define BREAKPAD_MACHINE_THREAD_STATE MACHINE_THREAD_STATE +#endif + +// The uuid_command struct/swap routines were added during the 10.4 series. +// Their presence isn't guaranteed. +struct breakpad_uuid_command { + uint32_t cmd; /* LC_UUID */ + uint32_t cmdsize; /* sizeof(struct uuid_command) */ + uint8_t uuid[16]; /* the 128-bit uuid */ +}; + +void breakpad_swap_uuid_command(struct breakpad_uuid_command *uc, + enum NXByteOrder target_byte_order); + +// Older SDKs defines thread_state_data_t as an int[] instead +// of the natural_t[] it should be. +typedef natural_t breakpad_thread_state_data_t[THREAD_STATE_MAX]; + +// The 64-bit swap routines were added during the 10.4 series, their +// presence isn't guaranteed. +void breakpad_swap_segment_command_64(struct segment_command_64 *sg, + enum NXByteOrder target_byte_order); + +void breakpad_swap_mach_header_64(struct mach_header_64 *mh, + enum NXByteOrder target_byte_order); + +void breakpad_swap_section_64(struct section_64 *s, + uint32_t nsects, + enum NXByteOrder target_byte_order); + +#endif diff --git a/3rdParty/Breakpad/src/common/mac/macho_walker.cc b/3rdParty/Breakpad/src/common/mac/macho_walker.cc new file mode 100644 index 0000000..92da7b1 --- /dev/null +++ b/3rdParty/Breakpad/src/common/mac/macho_walker.cc @@ -0,0 +1,267 @@ +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// macho_walker.cc: Iterate over the load commands in a mach-o file +// +// See macho_walker.h for documentation +// +// Author: Dan Waylonis + +extern "C" { // necessary for Leopard + #include <assert.h> + #include <fcntl.h> + #include <mach-o/arch.h> + #include <mach-o/loader.h> + #include <mach-o/swap.h> + #include <string.h> + #include <unistd.h> +} + +#include "common/mac/byteswap.h" +#include "common/mac/macho_walker.h" +#include "common/mac/macho_utilities.h" + +namespace MacFileUtilities { + +MachoWalker::MachoWalker(const char *path, LoadCommandCallback callback, + void *context) + : file_(0), + memory_(NULL), + memory_size_(0), + callback_(callback), + callback_context_(context), + current_header_(NULL), + current_header_size_(0), + current_header_offset_(0) { + file_ = open(path, O_RDONLY); +} + +MachoWalker::MachoWalker(void *memory, size_t size, + LoadCommandCallback callback, void *context) + : file_(0), + memory_(memory), + memory_size_(size), + callback_(callback), + callback_context_(context), + current_header_(NULL), + current_header_size_(0), + current_header_offset_(0) { +} + +MachoWalker::~MachoWalker() { + if (file_ != -1) + close(file_); +} + +int MachoWalker::ValidateCPUType(int cpu_type) { + // If the user didn't specify, use the local architecture. + if (cpu_type == 0) { + const NXArchInfo *arch = NXGetLocalArchInfo(); + assert(arch); + cpu_type = arch->cputype; + } + + return cpu_type; +} + +bool MachoWalker::WalkHeader(int cpu_type) { + int valid_cpu_type = ValidateCPUType(cpu_type); + off_t offset; + if (FindHeader(valid_cpu_type, offset)) { + if (cpu_type & CPU_ARCH_ABI64) + return WalkHeader64AtOffset(offset); + + return WalkHeaderAtOffset(offset); + } + + return false; +} + +bool MachoWalker::ReadBytes(void *buffer, size_t size, off_t offset) { + if (memory_) { + if (offset < 0) + return false; + bool result = true; + if (offset + size > memory_size_) { + if (static_cast<size_t>(offset) >= memory_size_) + return false; + size = memory_size_ - offset; + result = false; + } + memcpy(buffer, static_cast<char *>(memory_) + offset, size); + return result; + } else { + return pread(file_, buffer, size, offset) == (ssize_t)size; + } +} + +bool MachoWalker::CurrentHeader(struct mach_header_64 *header, off_t *offset) { + if (current_header_) { + memcpy(header, current_header_, sizeof(mach_header_64)); + *offset = current_header_offset_; + return true; + } + + return false; +} + +bool MachoWalker::FindHeader(int cpu_type, off_t &offset) { + int valid_cpu_type = ValidateCPUType(cpu_type); + // Read the magic bytes that's common amongst all mach-o files + uint32_t magic; + if (!ReadBytes(&magic, sizeof(magic), 0)) + return false; + + offset = sizeof(magic); + + // Figure out what type of file we've got + bool is_fat = false; + if (magic == FAT_MAGIC || magic == FAT_CIGAM) { + is_fat = true; + } + else if (magic != MH_MAGIC && magic != MH_CIGAM && magic != MH_MAGIC_64 && + magic != MH_CIGAM_64) { + return false; + } + + if (!is_fat) { + // If we don't have a fat header, check if the cpu type matches the single + // header + cpu_type_t header_cpu_type; + if (!ReadBytes(&header_cpu_type, sizeof(header_cpu_type), offset)) + return false; + + if (magic == MH_CIGAM || magic == MH_CIGAM_64) + header_cpu_type = ByteSwap(header_cpu_type); + + if (valid_cpu_type != header_cpu_type) + return false; + + offset = 0; + return true; + } else { + // Read the fat header and find an appropriate architecture + offset = 0; + struct fat_header fat; + if (!ReadBytes(&fat, sizeof(fat), offset)) + return false; + + if (NXHostByteOrder() != NX_BigEndian) + swap_fat_header(&fat, NXHostByteOrder()); + + offset += sizeof(fat); + + // Search each architecture for the desired one + struct fat_arch arch; + for (uint32_t i = 0; i < fat.nfat_arch; ++i) { + if (!ReadBytes(&arch, sizeof(arch), offset)) + return false; + + if (NXHostByteOrder() != NX_BigEndian) + swap_fat_arch(&arch, 1, NXHostByteOrder()); + + if (arch.cputype == valid_cpu_type) { + offset = arch.offset; + return true; + } + + offset += sizeof(arch); + } + } + + return false; +} + +bool MachoWalker::WalkHeaderAtOffset(off_t offset) { + struct mach_header header; + if (!ReadBytes(&header, sizeof(header), offset)) + return false; + + bool swap = (header.magic == MH_CIGAM); + if (swap) + swap_mach_header(&header, NXHostByteOrder()); + + // Copy the data into the mach_header_64 structure. Since the 32-bit and + // 64-bit only differ in the last field (reserved), this is safe to do. + struct mach_header_64 header64; + memcpy((void *)&header64, (const void *)&header, sizeof(header)); + header64.reserved = 0; + + current_header_ = &header64; + current_header_size_ = sizeof(header); // 32-bit, not 64-bit + current_header_offset_ = offset; + offset += current_header_size_; + bool result = WalkHeaderCore(offset, header.ncmds, swap); + current_header_ = NULL; + current_header_size_ = 0; + current_header_offset_ = 0; + return result; +} + +bool MachoWalker::WalkHeader64AtOffset(off_t offset) { + struct mach_header_64 header; + if (!ReadBytes(&header, sizeof(header), offset)) + return false; + + bool swap = (header.magic == MH_CIGAM_64); + if (swap) + breakpad_swap_mach_header_64(&header, NXHostByteOrder()); + + current_header_ = &header; + current_header_size_ = sizeof(header); + current_header_offset_ = offset; + offset += current_header_size_; + bool result = WalkHeaderCore(offset, header.ncmds, swap); + current_header_ = NULL; + current_header_size_ = 0; + current_header_offset_ = 0; + return result; +} + +bool MachoWalker::WalkHeaderCore(off_t offset, uint32_t number_of_commands, + bool swap) { + for (uint32_t i = 0; i < number_of_commands; ++i) { + struct load_command cmd; + if (!ReadBytes(&cmd, sizeof(cmd), offset)) + return false; + + if (swap) + swap_load_command(&cmd, NXHostByteOrder()); + + // Call the user callback + if (callback_ && !callback_(this, &cmd, offset, swap, callback_context_)) + break; + + offset += cmd.cmdsize; + } + + return true; +} + +} // namespace MacFileUtilities diff --git a/3rdParty/Breakpad/src/common/mac/macho_walker.h b/3rdParty/Breakpad/src/common/mac/macho_walker.h new file mode 100644 index 0000000..cee3eb8 --- /dev/null +++ b/3rdParty/Breakpad/src/common/mac/macho_walker.h @@ -0,0 +1,117 @@ +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// macho_walker.h: Iterate over the load commands in a mach-o file +// +// Author: Dan Waylonis + +#ifndef COMMON_MAC_MACHO_WALKER_H__ +#define COMMON_MAC_MACHO_WALKER_H__ + +#include <mach-o/loader.h> +#include <sys/types.h> + +namespace MacFileUtilities { + +class MachoWalker { + public: + // A callback function executed when a new load command is read. If no + // further processing of load commands is desired, return false. Otherwise, + // return true. + // |cmd| is the current command, and |offset| is the location relative to the + // beginning of the file (not header) where the command was read. If |swap| + // is set, then any command data (other than the returned load_command) should + // be swapped when read + typedef bool (*LoadCommandCallback)(MachoWalker *walker, load_command *cmd, + off_t offset, bool swap, void *context); + + MachoWalker(const char *path, LoadCommandCallback callback, void *context); + MachoWalker(void *memory, size_t size, LoadCommandCallback callback, + void *context); + ~MachoWalker(); + + // Begin walking the header for |cpu_type|. If |cpu_type| is 0, then the + // native cpu type is used. Otherwise, accepted values are listed in + // /usr/include/mach/machine.h (e.g., CPU_TYPE_X86 or CPU_TYPE_POWERPC). + // Returns false if opening the file failed or if the |cpu_type| is not + // present in the file. + bool WalkHeader(int cpu_type); + + // Locate (if any) the header offset for |cpu_type| and return in |offset|. + // Return true if found, false otherwise. + bool FindHeader(int cpu_type, off_t &offset); + + // Read |size| bytes from the opened file at |offset| into |buffer| + bool ReadBytes(void *buffer, size_t size, off_t offset); + + // Return the current header and header offset + bool CurrentHeader(struct mach_header_64 *header, off_t *offset); + + private: + // Validate the |cpu_type| + int ValidateCPUType(int cpu_type); + + // Process an individual header starting at |offset| from the start of the + // file. Return true if successful, false otherwise. + bool WalkHeaderAtOffset(off_t offset); + bool WalkHeader64AtOffset(off_t offset); + + // Bottleneck for walking the load commands + bool WalkHeaderCore(off_t offset, uint32_t number_of_commands, bool swap); + + // File descriptor to the opened file + int file_; + + // Memory location to read from. + void *memory_; + + // Size of the memory segment we can read from. + size_t memory_size_; + + // User specified callback & context + LoadCommandCallback callback_; + void *callback_context_; + + // Current header, size, and offset. The mach_header_64 is used for both + // 32-bit and 64-bit headers because they only differ in their last field + // (reserved). By adding the |current_header_size_| and the + // |current_header_offset_|, you can determine the offset in the file just + // after the header. + struct mach_header_64 *current_header_; + unsigned long current_header_size_; + off_t current_header_offset_; + + private: + MachoWalker(const MachoWalker &); + MachoWalker &operator=(const MachoWalker &); +}; + +} // namespace MacFileUtilities + +#endif // COMMON_MAC_MACHO_WALKER_H__ diff --git a/3rdParty/Breakpad/src/common/mac/scoped_task_suspend-inl.h b/3rdParty/Breakpad/src/common/mac/scoped_task_suspend-inl.h new file mode 100644 index 0000000..d6d1bef --- /dev/null +++ b/3rdParty/Breakpad/src/common/mac/scoped_task_suspend-inl.h @@ -0,0 +1,56 @@ +// Copyright (c) 2010 Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Inline implementation of ScopedTaskSuspend, which suspends a Mach +// task for the duration of its scope. + +#ifndef GOOGLE_BREAKPAD_COMMON_MAC_SCOPED_TASK_SUSPEND_H_ +#define GOOGLE_BREAKPAD_COMMON_MAC_SCOPED_TASK_SUSPEND_H_ + +#include <mach/mach.h> + +namespace google_breakpad { + +class ScopedTaskSuspend { + public: + explicit ScopedTaskSuspend(mach_port_t target) : target_(target) { + task_suspend(target_); + } + + ~ScopedTaskSuspend() { + task_resume(target_); + } + + private: + mach_port_t target_; +}; + +} // namespace google_breakpad + +#endif // GOOGLE_BREAKPAD_COMMON_MAC_SCOPED_TASK_SUSPEND_H_ diff --git a/3rdParty/Breakpad/src/common/mac/string_utilities.cc b/3rdParty/Breakpad/src/common/mac/string_utilities.cc new file mode 100644 index 0000000..e1f63a9 --- /dev/null +++ b/3rdParty/Breakpad/src/common/mac/string_utilities.cc @@ -0,0 +1,84 @@ +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "processor/scoped_ptr.h" +#include "common/mac/string_utilities.h" + +namespace MacStringUtils { + +using google_breakpad::scoped_array; + +std::string ConvertToString(CFStringRef str) { + CFIndex length = CFStringGetLength(str); + std::string result; + + if (!length) + return result; + + CFIndex maxUTF8Length = + CFStringGetMaximumSizeForEncoding(length, kCFStringEncodingUTF8); + scoped_array<UInt8> buffer(new UInt8[maxUTF8Length + 1]); + CFIndex actualUTF8Length; + CFStringGetBytes(str, CFRangeMake(0, length), kCFStringEncodingUTF8, 0, + false, buffer.get(), maxUTF8Length, &actualUTF8Length); + buffer[actualUTF8Length] = 0; + result.assign((const char *)buffer.get()); + + return result; +} + +unsigned int IntegerValueAtIndex(string &str, unsigned int idx) { + string digits("0123456789"), temp; + size_t start = 0; + size_t end; + size_t found = 0; + unsigned int result = 0; + + for (; found <= idx; ++found) { + end = str.find_first_not_of(digits, start); + + if (end == string::npos) + end = str.size(); + + temp = str.substr(start, end - start); + + if (found == idx) { + result = atoi(temp.c_str()); + } + + start = str.find_first_of(digits, end + 1); + + if (start == string::npos) + break; + } + + return result; +} + +} // namespace MacStringUtils diff --git a/3rdParty/Breakpad/src/common/mac/string_utilities.h b/3rdParty/Breakpad/src/common/mac/string_utilities.h new file mode 100644 index 0000000..6d89c83 --- /dev/null +++ b/3rdParty/Breakpad/src/common/mac/string_utilities.h @@ -0,0 +1,52 @@ +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// string_utilities.h: Utilities for strings for Mac platform + +#ifndef COMMON_MAC_STRING_UTILITIES_H__ +#define COMMON_MAC_STRING_UTILITIES_H__ + +#include <CoreFoundation/CoreFoundation.h> + +#include <string> + +namespace MacStringUtils { + +using std::string; + +// Convert a CoreFoundation string into a std::string +string ConvertToString(CFStringRef str); + +// Return the idx'th decimal integer in str, separated by non-decimal-digits +// E.g., str = 10.4.8, idx = 1 -> 4 +unsigned int IntegerValueAtIndex(string &str, unsigned int idx); + +} // namespace MacStringUtils + +#endif // COMMON_MAC_STRING_UTILITIES_H__ diff --git a/3rdParty/Breakpad/src/common/md5.cc b/3rdParty/Breakpad/src/common/md5.cc new file mode 100644 index 0000000..bccf61c --- /dev/null +++ b/3rdParty/Breakpad/src/common/md5.cc @@ -0,0 +1,251 @@ +/* + * written by Colin Plumb in 1993, no copyright is claimed. + * This code is in the public domain; do with it what you wish. + * + * Equivalent code is available from RSA Data Security, Inc. + * This code has been tested against that, and is equivalent, + * except that you don't need to include two pages of legalese + * with every copy. + * + * To compute the message digest of a chunk of bytes, declare an + * MD5Context structure, pass it to MD5Init, call MD5Update as + * needed on buffers full of bytes, and then call MD5Final, which + * will fill a supplied 16-byte array with the digest. + */ + +#include <string.h> + +#include "common/md5.h" + +namespace google_breakpad { + +#ifndef WORDS_BIGENDIAN +#define byteReverse(buf, len) /* Nothing */ +#else +/* + * Note: this code is harmless on little-endian machines. + */ +static void byteReverse(unsigned char *buf, unsigned longs) +{ + u32 t; + do { + t = (u32) ((unsigned) buf[3] << 8 | buf[2]) << 16 | + ((unsigned) buf[1] << 8 | buf[0]); + *(u32 *) buf = t; + buf += 4; + } while (--longs); +} +#endif + +static void MD5Transform(u32 buf[4], u32 const in[16]); + +/* + * Start MD5 accumulation. Set bit count to 0 and buffer to mysterious + * initialization constants. + */ +void MD5Init(struct MD5Context *ctx) +{ + ctx->buf[0] = 0x67452301; + ctx->buf[1] = 0xefcdab89; + ctx->buf[2] = 0x98badcfe; + ctx->buf[3] = 0x10325476; + + ctx->bits[0] = 0; + ctx->bits[1] = 0; +} + +/* + * Update context to reflect the concatenation of another buffer full + * of bytes. + */ +void MD5Update(struct MD5Context *ctx, unsigned char const *buf, unsigned len) +{ + u32 t; + + /* Update bitcount */ + + t = ctx->bits[0]; + if ((ctx->bits[0] = t + ((u32) len << 3)) < t) + ctx->bits[1]++; /* Carry from low to high */ + ctx->bits[1] += len >> 29; + + t = (t >> 3) & 0x3f; /* Bytes already in shsInfo->data */ + + /* Handle any leading odd-sized chunks */ + + if (t) { + unsigned char *p = (unsigned char *) ctx->in + t; + + t = 64 - t; + if (len < t) { + memcpy(p, buf, len); + return; + } + memcpy(p, buf, t); + byteReverse(ctx->in, 16); + MD5Transform(ctx->buf, (u32 *) ctx->in); + buf += t; + len -= t; + } + /* Process data in 64-byte chunks */ + + while (len >= 64) { + memcpy(ctx->in, buf, 64); + byteReverse(ctx->in, 16); + MD5Transform(ctx->buf, (u32 *) ctx->in); + buf += 64; + len -= 64; + } + + /* Handle any remaining bytes of data. */ + + memcpy(ctx->in, buf, len); +} + +/* + * Final wrapup - pad to 64-byte boundary with the bit pattern + * 1 0* (64-bit count of bits processed, MSB-first) + */ +void MD5Final(unsigned char digest[16], struct MD5Context *ctx) +{ + unsigned count; + unsigned char *p; + + /* Compute number of bytes mod 64 */ + count = (ctx->bits[0] >> 3) & 0x3F; + + /* Set the first char of padding to 0x80. This is safe since there is + always at least one byte free */ + p = ctx->in + count; + *p++ = 0x80; + + /* Bytes of padding needed to make 64 bytes */ + count = 64 - 1 - count; + + /* Pad out to 56 mod 64 */ + if (count < 8) { + /* Two lots of padding: Pad the first block to 64 bytes */ + memset(p, 0, count); + byteReverse(ctx->in, 16); + MD5Transform(ctx->buf, (u32 *) ctx->in); + + /* Now fill the next block with 56 bytes */ + memset(ctx->in, 0, 56); + } else { + /* Pad block to 56 bytes */ + memset(p, 0, count - 8); + } + byteReverse(ctx->in, 14); + + /* Append length in bits and transform */ + ((u32 *) ctx->in)[14] = ctx->bits[0]; + ((u32 *) ctx->in)[15] = ctx->bits[1]; + + MD5Transform(ctx->buf, (u32 *) ctx->in); + byteReverse((unsigned char *) ctx->buf, 4); + memcpy(digest, ctx->buf, 16); + memset(ctx, 0, sizeof(*ctx)); /* In case it's sensitive */ +} + +/* The four core functions - F1 is optimized somewhat */ + +/* #define F1(x, y, z) (x & y | ~x & z) */ +#define F1(x, y, z) (z ^ (x & (y ^ z))) +#define F2(x, y, z) F1(z, x, y) +#define F3(x, y, z) (x ^ y ^ z) +#define F4(x, y, z) (y ^ (x | ~z)) + +/* This is the central step in the MD5 algorithm. */ +#define MD5STEP(f, w, x, y, z, data, s) \ + ( w += f(x, y, z) + data, w = w<<s | w>>(32-s), w += x ) + +/* + * The core of the MD5 algorithm, this alters an existing MD5 hash to + * reflect the addition of 16 longwords of new data. MD5Update blocks + * the data and converts bytes into longwords for this routine. + */ +static void MD5Transform(u32 buf[4], u32 const in[16]) +{ + register u32 a, b, c, d; + + a = buf[0]; + b = buf[1]; + c = buf[2]; + d = buf[3]; + + MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7); + MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12); + MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17); + MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22); + MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7); + MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12); + MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17); + MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22); + MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7); + MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12); + MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17); + MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22); + MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7); + MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12); + MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17); + MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22); + + MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5); + MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9); + MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14); + MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20); + MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5); + MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9); + MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14); + MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20); + MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5); + MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9); + MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14); + MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20); + MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5); + MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9); + MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14); + MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20); + + MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4); + MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11); + MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16); + MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23); + MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4); + MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11); + MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16); + MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23); + MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4); + MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11); + MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16); + MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23); + MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4); + MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11); + MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16); + MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23); + + MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6); + MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10); + MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15); + MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21); + MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6); + MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10); + MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15); + MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21); + MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6); + MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10); + MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15); + MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21); + MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6); + MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10); + MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15); + MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21); + + buf[0] += a; + buf[1] += b; + buf[2] += c; + buf[3] += d; +} + +} // namespace google_breakpad + diff --git a/3rdParty/Breakpad/src/common/md5.h b/3rdParty/Breakpad/src/common/md5.h new file mode 100644 index 0000000..e96521e --- /dev/null +++ b/3rdParty/Breakpad/src/common/md5.h @@ -0,0 +1,27 @@ +// Copyright 2007 Google Inc. All Rights Reserved. +// Author: liuli@google.com (Liu Li) +#ifndef COMMON_MD5_H__ +#define COMMON_MD5_H__ + +#include <stdint.h> + +namespace google_breakpad { + +typedef uint32_t u32; +typedef uint8_t u8; + +struct MD5Context { + u32 buf[4]; + u32 bits[2]; + u8 in[64]; +}; + +void MD5Init(struct MD5Context *ctx); + +void MD5Update(struct MD5Context *ctx, unsigned char const *buf, unsigned len); + +void MD5Final(unsigned char digest[16], struct MD5Context *ctx); + +} // namespace google_breakpad + +#endif // COMMON_MD5_H__ diff --git a/3rdParty/Breakpad/src/common/memory.h b/3rdParty/Breakpad/src/common/memory.h new file mode 100644 index 0000000..e90bd52 --- /dev/null +++ b/3rdParty/Breakpad/src/common/memory.h @@ -0,0 +1,218 @@ +// Copyright (c) 2009, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef GOOGLE_BREAKPAD_COMMON_MEMORY_H_ +#define GOOGLE_BREAKPAD_COMMON_MEMORY_H_ + +#include <stdint.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/mman.h> + +#ifdef __APPLE__ +#define sys_mmap mmap +#define sys_mmap2 mmap +#define sys_munmap munmap +#define MAP_ANONYMOUS MAP_ANON +#else +#include "third_party/lss/linux_syscall_support.h" +#endif + +namespace google_breakpad { + +// This is very simple allocator which fetches pages from the kernel directly. +// Thus, it can be used even when the heap may be corrupted. +// +// There is no free operation. The pages are only freed when the object is +// destroyed. +class PageAllocator { + public: + PageAllocator() + : page_size_(getpagesize()), + last_(NULL), + current_page_(NULL), + page_offset_(0) { + } + + ~PageAllocator() { + FreeAll(); + } + + void *Alloc(unsigned bytes) { + if (!bytes) + return NULL; + + if (current_page_ && page_size_ - page_offset_ >= bytes) { + uint8_t *const ret = current_page_ + page_offset_; + page_offset_ += bytes; + if (page_offset_ == page_size_) { + page_offset_ = 0; + current_page_ = NULL; + } + + return ret; + } + + const unsigned pages = + (bytes + sizeof(PageHeader) + page_size_ - 1) / page_size_; + uint8_t *const ret = GetNPages(pages); + if (!ret) + return NULL; + + page_offset_ = (page_size_ - (page_size_ * pages - (bytes + sizeof(PageHeader)))) % page_size_; + current_page_ = page_offset_ ? ret + page_size_ * (pages - 1) : NULL; + + return ret + sizeof(PageHeader); + } + + private: + uint8_t *GetNPages(unsigned num_pages) { +#ifdef __x86_64 + void *a = sys_mmap(NULL, page_size_ * num_pages, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); +#else + void *a = sys_mmap2(NULL, page_size_ * num_pages, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); +#endif + if (a == MAP_FAILED) + return NULL; + + struct PageHeader *header = reinterpret_cast<PageHeader*>(a); + header->next = last_; + header->num_pages = num_pages; + last_ = header; + + return reinterpret_cast<uint8_t*>(a); + } + + void FreeAll() { + PageHeader *next; + + for (PageHeader *cur = last_; cur; cur = next) { + next = cur->next; + sys_munmap(cur, cur->num_pages * page_size_); + } + } + + struct PageHeader { + PageHeader *next; // pointer to the start of the next set of pages. + unsigned num_pages; // the number of pages in this set. + }; + + const unsigned page_size_; + PageHeader *last_; + uint8_t *current_page_; + unsigned page_offset_; +}; + +// A wasteful vector is like a normal std::vector, except that it's very much +// simplier and it allocates memory from a PageAllocator. It's wasteful +// because, when resizing, it always allocates a whole new array since the +// PageAllocator doesn't support realloc. +template<class T> +class wasteful_vector { + public: + wasteful_vector(PageAllocator *allocator, unsigned size_hint = 16) + : allocator_(allocator), + a_((T*) allocator->Alloc(sizeof(T) * size_hint)), + allocated_(size_hint), + used_(0) { + } + + T& back() { + return a_[used_ - 1]; + } + + const T& back() const { + return a_[used_ - 1]; + } + + bool empty() const { + return used_ == 0; + } + + void push_back(const T& new_element) { + if (used_ == allocated_) + Realloc(allocated_ * 2); + a_[used_++] = new_element; + } + + size_t size() const { + return used_; + } + + void resize(unsigned sz, T c = T()) { + // No need to test "sz >= 0", as "sz" is unsigned. + if (sz <= used_) { + used_ = sz; + } else { + unsigned a = allocated_; + if (sz > a) { + while (sz > a) { + a *= 2; + } + Realloc(a); + } + while (sz > used_) { + a_[used_++] = c; + } + } + } + + T& operator[](size_t index) { + return a_[index]; + } + + const T& operator[](size_t index) const { + return a_[index]; + } + + private: + void Realloc(unsigned new_size) { + T *new_array = + reinterpret_cast<T*>(allocator_->Alloc(sizeof(T) * new_size)); + memcpy(new_array, a_, used_ * sizeof(T)); + a_ = new_array; + allocated_ = new_size; + } + + PageAllocator *const allocator_; + T *a_; // pointer to an array of |allocated_| elements. + unsigned allocated_; // size of |a_|, in elements. + unsigned used_; // number of used slots in |a_|. +}; + +} // namespace google_breakpad + +inline void* operator new(size_t nbytes, + google_breakpad::PageAllocator& allocator) { + return allocator.Alloc(nbytes); +} + +#endif // GOOGLE_BREAKPAD_COMMON_MEMORY_H_ diff --git a/3rdParty/Breakpad/src/common/module.cc b/3rdParty/Breakpad/src/common/module.cc new file mode 100644 index 0000000..4e257d1 --- /dev/null +++ b/3rdParty/Breakpad/src/common/module.cc @@ -0,0 +1,294 @@ +// Copyright (c) 2011 Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// module.cc: Implement google_breakpad::Module. See module.h. + +#include "common/module.h" + +#include <assert.h> +#include <errno.h> +#include <stdio.h> +#include <string.h> + +#include <iostream> +#include <utility> + +namespace google_breakpad { + +using std::dec; +using std::endl; +using std::hex; + + +Module::Module(const string &name, const string &os, + const string &architecture, const string &id) : + name_(name), + os_(os), + architecture_(architecture), + id_(id), + load_address_(0) { } + +Module::~Module() { + for (FileByNameMap::iterator it = files_.begin(); it != files_.end(); ++it) + delete it->second; + for (FunctionSet::iterator it = functions_.begin(); + it != functions_.end(); ++it) { + delete *it; + } + for (vector<StackFrameEntry *>::iterator it = stack_frame_entries_.begin(); + it != stack_frame_entries_.end(); ++it) { + delete *it; + } + for (ExternSet::iterator it = externs_.begin(); it != externs_.end(); ++it) + delete *it; +} + +void Module::SetLoadAddress(Address address) { + load_address_ = address; +} + +void Module::AddFunction(Function *function) { + // FUNC lines must not hold an empty name, so catch the problem early if + // callers try to add one. + assert(!function->name.empty()); + std::pair<FunctionSet::iterator,bool> ret = functions_.insert(function); + if (!ret.second) { + // Free the duplicate that was not inserted because this Module + // now owns it. + delete function; + } +} + +void Module::AddFunctions(vector<Function *>::iterator begin, + vector<Function *>::iterator end) { + for (vector<Function *>::iterator it = begin; it != end; ++it) + AddFunction(*it); +} + +void Module::AddStackFrameEntry(StackFrameEntry *stack_frame_entry) { + stack_frame_entries_.push_back(stack_frame_entry); +} + +void Module::AddExtern(Extern *ext) { + std::pair<ExternSet::iterator,bool> ret = externs_.insert(ext); + if (!ret.second) { + // Free the duplicate that was not inserted because this Module + // now owns it. + delete ext; + } +} + +void Module::GetFunctions(vector<Function *> *vec, + vector<Function *>::iterator i) { + vec->insert(i, functions_.begin(), functions_.end()); +} + +void Module::GetExterns(vector<Extern *> *vec, + vector<Extern *>::iterator i) { + vec->insert(i, externs_.begin(), externs_.end()); +} + +Module::File *Module::FindFile(const string &name) { + // A tricky bit here. The key of each map entry needs to be a + // pointer to the entry's File's name string. This means that we + // can't do the initial lookup with any operation that would create + // an empty entry for us if the name isn't found (like, say, + // operator[] or insert do), because such a created entry's key will + // be a pointer the string passed as our argument. Since the key of + // a map's value type is const, we can't fix it up once we've + // created our file. lower_bound does the lookup without doing an + // insertion, and returns a good hint iterator to pass to insert. + // Our "destiny" is where we belong, whether we're there or not now. + FileByNameMap::iterator destiny = files_.lower_bound(&name); + if (destiny == files_.end() + || *destiny->first != name) { // Repeated string comparison, boo hoo. + File *file = new File; + file->name = name; + file->source_id = -1; + destiny = files_.insert(destiny, + FileByNameMap::value_type(&file->name, file)); + } + return destiny->second; +} + +Module::File *Module::FindFile(const char *name) { + string name_string = name; + return FindFile(name_string); +} + +Module::File *Module::FindExistingFile(const string &name) { + FileByNameMap::iterator it = files_.find(&name); + return (it == files_.end()) ? NULL : it->second; +} + +void Module::GetFiles(vector<File *> *vec) { + vec->clear(); + for (FileByNameMap::iterator it = files_.begin(); it != files_.end(); ++it) + vec->push_back(it->second); +} + +void Module::GetStackFrameEntries(vector<StackFrameEntry *> *vec) { + *vec = stack_frame_entries_; +} + +void Module::AssignSourceIds() { + // First, give every source file an id of -1. + for (FileByNameMap::iterator file_it = files_.begin(); + file_it != files_.end(); ++file_it) { + file_it->second->source_id = -1; + } + + // Next, mark all files actually cited by our functions' line number + // info, by setting each one's source id to zero. + for (FunctionSet::const_iterator func_it = functions_.begin(); + func_it != functions_.end(); ++func_it) { + Function *func = *func_it; + for (vector<Line>::iterator line_it = func->lines.begin(); + line_it != func->lines.end(); ++line_it) + line_it->file->source_id = 0; + } + + // Finally, assign source ids to those files that have been marked. + // We could have just assigned source id numbers while traversing + // the line numbers, but doing it this way numbers the files in + // lexicographical order by name, which is neat. + int next_source_id = 0; + for (FileByNameMap::iterator file_it = files_.begin(); + file_it != files_.end(); ++file_it) { + if (!file_it->second->source_id) + file_it->second->source_id = next_source_id++; + } +} + +bool Module::ReportError() { + fprintf(stderr, "error writing symbol file: %s\n", + strerror(errno)); + return false; +} + +bool Module::WriteRuleMap(const RuleMap &rule_map, std::ostream &stream) { + for (RuleMap::const_iterator it = rule_map.begin(); + it != rule_map.end(); ++it) { + if (it != rule_map.begin()) + stream << ' '; + stream << it->first << ": " << it->second; + } + return stream.good(); +} + +bool Module::Write(std::ostream &stream, bool cfi) { + stream << "MODULE " << os_ << " " << architecture_ << " " + << id_ << " " << name_ << endl; + if (!stream.good()) + return ReportError(); + + AssignSourceIds(); + + // Write out files. + for (FileByNameMap::iterator file_it = files_.begin(); + file_it != files_.end(); ++file_it) { + File *file = file_it->second; + if (file->source_id >= 0) { + stream << "FILE " << file->source_id << " " << file->name << endl; + if (!stream.good()) + return ReportError(); + } + } + + // Write out functions and their lines. + for (FunctionSet::const_iterator func_it = functions_.begin(); + func_it != functions_.end(); ++func_it) { + Function *func = *func_it; + stream << "FUNC " << hex + << (func->address - load_address_) << " " + << func->size << " " + << func->parameter_size << " " + << func->name << dec << endl; + + if (!stream.good()) + return ReportError(); + for (vector<Line>::iterator line_it = func->lines.begin(); + line_it != func->lines.end(); ++line_it) { + stream << hex + << (line_it->address - load_address_) << " " + << line_it->size << " " + << dec + << line_it->number << " " + << line_it->file->source_id << endl; + if (!stream.good()) + return ReportError(); + } + } + + // Write out 'PUBLIC' records. + for (ExternSet::const_iterator extern_it = externs_.begin(); + extern_it != externs_.end(); ++extern_it) { + Extern *ext = *extern_it; + stream << "PUBLIC " << hex + << (ext->address - load_address_) << " 0 " + << ext->name << dec << endl; + if (!stream.good()) + return ReportError(); + } + + if (cfi) { + // Write out 'STACK CFI INIT' and 'STACK CFI' records. + vector<StackFrameEntry *>::const_iterator frame_it; + for (frame_it = stack_frame_entries_.begin(); + frame_it != stack_frame_entries_.end(); ++frame_it) { + StackFrameEntry *entry = *frame_it; + stream << "STACK CFI INIT " << hex + << (entry->address - load_address_) << " " + << entry->size << " " << dec; + if (!stream.good() + || !WriteRuleMap(entry->initial_rules, stream)) + return ReportError(); + + stream << endl; + + // Write out this entry's delta rules as 'STACK CFI' records. + for (RuleChangeMap::const_iterator delta_it = entry->rule_changes.begin(); + delta_it != entry->rule_changes.end(); ++delta_it) { + stream << "STACK CFI " << hex + << (delta_it->first - load_address_) << " " << dec; + if (!stream.good() + || !WriteRuleMap(delta_it->second, stream)) + return ReportError(); + + stream << endl; + } + } + } + + return true; +} + +} // namespace google_breakpad diff --git a/3rdParty/Breakpad/src/common/module.h b/3rdParty/Breakpad/src/common/module.h new file mode 100644 index 0000000..cc89bba --- /dev/null +++ b/3rdParty/Breakpad/src/common/module.h @@ -0,0 +1,321 @@ +// -*- mode: c++ -*- + +// Copyright (c) 2010 Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// module.h: Define google_breakpad::Module. A Module holds debugging +// information, and can write that information out as a Breakpad +// symbol file. + +#ifndef COMMON_LINUX_MODULE_H__ +#define COMMON_LINUX_MODULE_H__ + +#include <iostream> +#include <map> +#include <set> +#include <string> +#include <vector> + +#include "common/using_std_string.h" +#include "google_breakpad/common/breakpad_types.h" + +namespace google_breakpad { + +using std::set; +using std::vector; +using std::map; + +// A Module represents the contents of a module, and supports methods +// for adding information produced by parsing STABS or DWARF data +// --- possibly both from the same file --- and then writing out the +// unified contents as a Breakpad-format symbol file. +class Module { + public: + // The type of addresses and sizes in a symbol table. + typedef u_int64_t Address; + struct File; + struct Function; + struct Line; + struct Extern; + + // Addresses appearing in File, Function, and Line structures are + // absolute, not relative to the the module's load address. That + // is, if the module were loaded at its nominal load address, the + // addresses would be correct. + + // A source file. + struct File { + // The name of the source file. + string name; + + // The file's source id. The Write member function clears this + // field and assigns source ids a fresh, so any value placed here + // before calling Write will be lost. + int source_id; + }; + + // A function. + struct Function { + // For sorting by address. (Not style-guide compliant, but it's + // stupid not to put this in the struct.) + static bool CompareByAddress(const Function *x, const Function *y) { + return x->address < y->address; + } + + // The function's name. + string name; + + // The start address and length of the function's code. + Address address, size; + + // The function's parameter size. + Address parameter_size; + + // Source lines belonging to this function, sorted by increasing + // address. + vector<Line> lines; + }; + + // A source line. + struct Line { + // For sorting by address. (Not style-guide compliant, but it's + // stupid not to put this in the struct.) + static bool CompareByAddress(const Module::Line &x, const Module::Line &y) { + return x.address < y.address; + } + + Address address, size; // The address and size of the line's code. + File *file; // The source file. + int number; // The source line number. + }; + + // An exported symbol. + struct Extern { + Address address; + string name; + }; + + // A map from register names to postfix expressions that recover + // their their values. This can represent a complete set of rules to + // follow at some address, or a set of changes to be applied to an + // extant set of rules. + typedef map<string, string> RuleMap; + + // A map from addresses to RuleMaps, representing changes that take + // effect at given addresses. + typedef map<Address, RuleMap> RuleChangeMap; + + // A range of 'STACK CFI' stack walking information. An instance of + // this structure corresponds to a 'STACK CFI INIT' record and the + // subsequent 'STACK CFI' records that fall within its range. + struct StackFrameEntry { + // The starting address and number of bytes of machine code this + // entry covers. + Address address, size; + + // The initial register recovery rules, in force at the starting + // address. + RuleMap initial_rules; + + // A map from addresses to rule changes. To find the rules in + // force at a given address, start with initial_rules, and then + // apply the changes given in this map for all addresses up to and + // including the address you're interested in. + RuleChangeMap rule_changes; + }; + + struct FunctionCompare { + bool operator() (const Function *lhs, + const Function *rhs) const { + if (lhs->address == rhs->address) + return lhs->name < rhs->name; + return lhs->address < rhs->address; + } + }; + + struct ExternCompare { + bool operator() (const Extern *lhs, + const Extern *rhs) const { + return lhs->address < rhs->address; + } + }; + + // Create a new module with the given name, operating system, + // architecture, and ID string. + Module(const string &name, const string &os, const string &architecture, + const string &id); + ~Module(); + + // Set the module's load address to LOAD_ADDRESS; addresses given + // for functions and lines will be written to the Breakpad symbol + // file as offsets from this address. Construction initializes this + // module's load address to zero: addresses written to the symbol + // file will be the same as they appear in the Function, Line, and + // StackFrameEntry structures. + // + // Note that this member function has no effect on addresses stored + // in the data added to this module; the Write member function + // simply subtracts off the load address from addresses before it + // prints them. Only the last load address given before calling + // Write is used. + void SetLoadAddress(Address load_address); + + // Add FUNCTION to the module. FUNCTION's name must not be empty. + // This module owns all Function objects added with this function: + // destroying the module destroys them as well. + void AddFunction(Function *function); + + // Add all the functions in [BEGIN,END) to the module. + // This module owns all Function objects added with this function: + // destroying the module destroys them as well. + void AddFunctions(vector<Function *>::iterator begin, + vector<Function *>::iterator end); + + // Add STACK_FRAME_ENTRY to the module. + // This module owns all StackFrameEntry objects added with this + // function: destroying the module destroys them as well. + void AddStackFrameEntry(StackFrameEntry *stack_frame_entry); + + // Add PUBLIC to the module. + // This module owns all Extern objects added with this function: + // destroying the module destroys them as well. + void AddExtern(Extern *ext); + + // If this module has a file named NAME, return a pointer to it. If + // it has none, then create one and return a pointer to the new + // file. This module owns all File objects created using these + // functions; destroying the module destroys them as well. + File *FindFile(const string &name); + File *FindFile(const char *name); + + // If this module has a file named NAME, return a pointer to it. + // Otherwise, return NULL. + File *FindExistingFile(const string &name); + + // Insert pointers to the functions added to this module at I in + // VEC. The pointed-to Functions are still owned by this module. + // (Since this is effectively a copy of the function list, this is + // mostly useful for testing; other uses should probably get a more + // appropriate interface.) + void GetFunctions(vector<Function *> *vec, vector<Function *>::iterator i); + + // Insert pointers to the externs added to this module at I in + // VEC. The pointed-to Externs are still owned by this module. + // (Since this is effectively a copy of the extern list, this is + // mostly useful for testing; other uses should probably get a more + // appropriate interface.) + void GetExterns(vector<Extern *> *vec, vector<Extern *>::iterator i); + + // Clear VEC and fill it with pointers to the Files added to this + // module, sorted by name. The pointed-to Files are still owned by + // this module. (Since this is effectively a copy of the file list, + // this is mostly useful for testing; other uses should probably get + // a more appropriate interface.) + void GetFiles(vector<File *> *vec); + + // Clear VEC and fill it with pointers to the StackFrameEntry + // objects that have been added to this module. (Since this is + // effectively a copy of the stack frame entry list, this is mostly + // useful for testing; other uses should probably get + // a more appropriate interface.) + void GetStackFrameEntries(vector<StackFrameEntry *> *vec); + + // Find those files in this module that are actually referred to by + // functions' line number data, and assign them source id numbers. + // Set the source id numbers for all other files --- unused by the + // source line data --- to -1. We do this before writing out the + // symbol file, at which point we omit any unused files. + void AssignSourceIds(); + + // Call AssignSourceIds, and write this module to STREAM in the + // breakpad symbol format. Return true if all goes well, or false if + // an error occurs. This method writes out: + // - a header based on the values given to the constructor, + // - the source files added via FindFile, + // - the functions added via AddFunctions, each with its lines, + // - all public records, + // - and if CFI is true, all CFI records. + // Addresses in the output are all relative to the load address + // established by SetLoadAddress. + bool Write(std::ostream &stream, bool cfi); + + private: + // Report an error that has occurred writing the symbol file, using + // errno to find the appropriate cause. Return false. + static bool ReportError(); + + // Write RULE_MAP to STREAM, in the form appropriate for 'STACK CFI' + // records, without a final newline. Return true if all goes well; + // if an error occurs, return false, and leave errno set. + static bool WriteRuleMap(const RuleMap &rule_map, std::ostream &stream); + + // Module header entries. + string name_, os_, architecture_, id_; + + // The module's nominal load address. Addresses for functions and + // lines are absolute, assuming the module is loaded at this + // address. + Address load_address_; + + // Relation for maps whose keys are strings shared with some other + // structure. + struct CompareStringPtrs { + bool operator()(const string *x, const string *y) { return *x < *y; } + }; + + // A map from filenames to File structures. The map's keys are + // pointers to the Files' names. + typedef map<const string *, File *, CompareStringPtrs> FileByNameMap; + + // A set containing Function structures, sorted by address. + typedef set<Function *, FunctionCompare> FunctionSet; + + // A set containing Extern structures, sorted by address. + typedef set<Extern *, ExternCompare> ExternSet; + + // The module owns all the files and functions that have been added + // to it; destroying the module frees the Files and Functions these + // point to. + FileByNameMap files_; // This module's source files. + FunctionSet functions_; // This module's functions. + + // The module owns all the call frame info entries that have been + // added to it. + vector<StackFrameEntry *> stack_frame_entries_; + + // The module owns all the externs that have been added to it; + // destroying the module frees the Externs these point to. + ExternSet externs_; +}; + +} // namespace google_breakpad + +#endif // COMMON_LINUX_MODULE_H__ diff --git a/3rdParty/Breakpad/src/common/stabs_reader.cc b/3rdParty/Breakpad/src/common/stabs_reader.cc new file mode 100644 index 0000000..6019fc7 --- /dev/null +++ b/3rdParty/Breakpad/src/common/stabs_reader.cc @@ -0,0 +1,315 @@ +// Copyright (c) 2010 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// This file implements the google_breakpad::StabsReader class. +// See stabs_reader.h. + +#include "common/stabs_reader.h" + +#include <assert.h> +#include <stab.h> +#include <string.h> + +#include <string> + +#include "common/using_std_string.h" + +using std::vector; + +namespace google_breakpad { + +StabsReader::EntryIterator::EntryIterator(const ByteBuffer *buffer, + bool big_endian, size_t value_size) + : value_size_(value_size), cursor_(buffer, big_endian) { + // Actually, we could handle weird sizes just fine, but they're + // probably mistakes --- expressed in bits, say. + assert(value_size == 4 || value_size == 8); + entry_.index = 0; + Fetch(); +} + +void StabsReader::EntryIterator::Fetch() { + cursor_ + .Read(4, false, &entry_.name_offset) + .Read(1, false, &entry_.type) + .Read(1, false, &entry_.other) + .Read(2, false, &entry_.descriptor) + .Read(value_size_, false, &entry_.value); + entry_.at_end = !cursor_; +} + +StabsReader::StabsReader(const uint8_t *stab, size_t stab_size, + const uint8_t *stabstr, size_t stabstr_size, + bool big_endian, size_t value_size, bool unitized, + StabsHandler *handler) + : entries_(stab, stab_size), + strings_(stabstr, stabstr_size), + iterator_(&entries_, big_endian, value_size), + unitized_(unitized), + handler_(handler), + string_offset_(0), + next_cu_string_offset_(0), + current_source_file_(NULL) { } + +const char *StabsReader::SymbolString() { + ptrdiff_t offset = string_offset_ + iterator_->name_offset; + if (offset < 0 || (size_t) offset >= strings_.Size()) { + handler_->Warning("symbol %d: name offset outside the string section\n", + iterator_->index); + // Return our null string, to keep our promise about all names being + // taken from the string section. + offset = 0; + } + return reinterpret_cast<const char *>(strings_.start + offset); +} + +bool StabsReader::Process() { + while (!iterator_->at_end) { + if (iterator_->type == N_SO) { + if (! ProcessCompilationUnit()) + return false; + } else if (iterator_->type == N_UNDF && unitized_) { + // In unitized STABS (including Linux STABS, and pretty much anything + // else that puts STABS data in sections), at the head of each + // compilation unit's entries there is an N_UNDF stab giving the + // number of symbols in the compilation unit, and the number of bytes + // that compilation unit's strings take up in the .stabstr section. + // Each CU's strings are separate; the n_strx values are offsets + // within the current CU's portion of the .stabstr section. + // + // As an optimization, the GNU linker combines all the + // compilation units into one, with a single N_UNDF at the + // beginning. However, other linkers, like Gold, do not perform + // this optimization. + string_offset_ = next_cu_string_offset_; + next_cu_string_offset_ = iterator_->value; + ++iterator_; + } +#if defined(HAVE_MACH_O_NLIST_H) + // Export symbols in Mach-O binaries look like this. + // This is necessary in order to be able to dump symbols + // from OS X system libraries. + else if ((iterator_->type & N_STAB) == 0 && + (iterator_->type & N_TYPE) == N_SECT) { + ProcessExtern(); + } +#endif + else { + ++iterator_; + } + } + return true; +} + +bool StabsReader::ProcessCompilationUnit() { + assert(!iterator_->at_end && iterator_->type == N_SO); + + // There may be an N_SO entry whose name ends with a slash, + // indicating the directory in which the compilation occurred. + // The build directory defaults to NULL. + const char *build_directory = NULL; + { + const char *name = SymbolString(); + if (name[0] && name[strlen(name) - 1] == '/') { + build_directory = name; + ++iterator_; + } + } + + // We expect to see an N_SO entry with a filename next, indicating + // the start of the compilation unit. + { + if (iterator_->at_end || iterator_->type != N_SO) + return true; + const char *name = SymbolString(); + if (name[0] == '\0') { + // This seems to be a stray end-of-compilation-unit marker; + // consume it, but don't report the end, since we didn't see a + // beginning. + ++iterator_; + return true; + } + current_source_file_ = name; + } + + if (! handler_->StartCompilationUnit(current_source_file_, + iterator_->value, + build_directory)) + return false; + + ++iterator_; + + // The STABS documentation says that some compilers may emit + // additional N_SO entries with names immediately following the + // first, and that they should be ignored. However, the original + // Breakpad STABS reader doesn't ignore them, so we won't either. + + // Process the body of the compilation unit, up to the next N_SO. + while (!iterator_->at_end && iterator_->type != N_SO) { + if (iterator_->type == N_FUN) { + if (! ProcessFunction()) + return false; + } else if (iterator_->type == N_SLINE) { + // Mac OS X STABS place SLINE records before functions. + Line line; + // The value of an N_SLINE entry that appears outside a function is + // the absolute address of the line. + line.address = iterator_->value; + line.filename = current_source_file_; + // The n_desc of a N_SLINE entry is the line number. It's a + // signed 16-bit field; line numbers from 32768 to 65535 are + // stored as n-65536. + line.number = (uint16_t) iterator_->descriptor; + queued_lines_.push_back(line); + ++iterator_; + } else if (iterator_->type == N_SOL) { + current_source_file_ = SymbolString(); + ++iterator_; + } else { + // Ignore anything else. + ++iterator_; + } + } + + // An N_SO with an empty name indicates the end of the compilation + // unit. Default to zero. + uint64_t ending_address = 0; + if (!iterator_->at_end) { + assert(iterator_->type == N_SO); + const char *name = SymbolString(); + if (name[0] == '\0') { + ending_address = iterator_->value; + ++iterator_; + } + } + + if (! handler_->EndCompilationUnit(ending_address)) + return false; + + queued_lines_.clear(); + + return true; +} + +bool StabsReader::ProcessFunction() { + assert(!iterator_->at_end && iterator_->type == N_FUN); + + uint64_t function_address = iterator_->value; + // The STABS string for an N_FUN entry is the name of the function, + // followed by a colon, followed by type information for the + // function. We want to pass the name alone to StartFunction. + const char *stab_string = SymbolString(); + const char *name_end = strchr(stab_string, ':'); + if (! name_end) + name_end = stab_string + strlen(stab_string); + string name(stab_string, name_end - stab_string); + if (! handler_->StartFunction(name, function_address)) + return false; + ++iterator_; + + // If there were any SLINE records given before the function, report them now. + for (vector<Line>::const_iterator it = queued_lines_.begin(); + it != queued_lines_.end(); it++) { + if (!handler_->Line(it->address, it->filename, it->number)) + return false; + } + queued_lines_.clear(); + + while (!iterator_->at_end) { + if (iterator_->type == N_SO || iterator_->type == N_FUN) + break; + else if (iterator_->type == N_SLINE) { + // The value of an N_SLINE entry is the offset of the line from + // the function's start address. + uint64_t line_address = function_address + iterator_->value; + // The n_desc of a N_SLINE entry is the line number. It's a + // signed 16-bit field; line numbers from 32768 to 65535 are + // stored as n-65536. + uint16_t line_number = iterator_->descriptor; + if (! handler_->Line(line_address, current_source_file_, line_number)) + return false; + ++iterator_; + } else if (iterator_->type == N_SOL) { + current_source_file_ = SymbolString(); + ++iterator_; + } else + // Ignore anything else. + ++iterator_; + } + + // We've reached the end of the function. See if we can figure out its + // ending address. + uint64_t ending_address = 0; + if (!iterator_->at_end) { + assert(iterator_->type == N_SO || iterator_->type == N_FUN); + if (iterator_->type == N_FUN) { + const char *symbol_name = SymbolString(); + if (symbol_name[0] == '\0') { + // An N_FUN entry with no name is a terminator for this function; + // its value is the function's size. + ending_address = function_address + iterator_->value; + ++iterator_; + } else { + // An N_FUN entry with a name is the next function, and we can take + // its value as our ending address. Don't advance the iterator, as + // we'll use this symbol to start the next function as well. + ending_address = iterator_->value; + } + } else { + // An N_SO entry could be an end-of-compilation-unit marker, or the + // start of the next compilation unit, but in either case, its value + // is our ending address. We don't advance the iterator; + // ProcessCompilationUnit will decide what to do with this symbol. + ending_address = iterator_->value; + } + } + + if (! handler_->EndFunction(ending_address)) + return false; + + return true; +} + +bool StabsReader::ProcessExtern() { +#if defined(HAVE_MACH_O_NLIST_H) + assert(!iterator_->at_end && + (iterator_->type & N_STAB) == 0 && + (iterator_->type & N_TYPE) == N_SECT); +#endif + + // TODO(mark): only do symbols in the text section? + if (!handler_->Extern(SymbolString(), iterator_->value)) + return false; + + ++iterator_; + return true; +} + +} // namespace google_breakpad diff --git a/3rdParty/Breakpad/src/common/stabs_reader.h b/3rdParty/Breakpad/src/common/stabs_reader.h new file mode 100644 index 0000000..d89afc0 --- /dev/null +++ b/3rdParty/Breakpad/src/common/stabs_reader.h @@ -0,0 +1,326 @@ +// -*- mode: c++ -*- + +// Copyright (c) 2010 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// stabs_reader.h: Define StabsReader, a parser for STABS debugging +// information. A description of the STABS debugging format can be +// found at: +// +// http://sourceware.org/gdb/current/onlinedocs/stabs_toc.html +// +// The comments here assume you understand the format. +// +// This parser can handle big-endian and little-endian data, and the symbol +// values may be either 32 or 64 bits long. It handles both STABS in +// sections (as used on Linux) and STABS appearing directly in an +// a.out-like symbol table (as used in Darwin OS X Mach-O files). + +#ifndef COMMON_STABS_READER_H__ +#define COMMON_STABS_READER_H__ + +#include <stddef.h> +#include <stdint.h> + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#ifdef HAVE_A_OUT_H +#include <a.out.h> +#endif +#ifdef HAVE_MACH_O_NLIST_H +#include <mach-o/nlist.h> +#endif + +#include <string> +#include <vector> + +#include "common/byte_cursor.h" +#include "common/using_std_string.h" + +namespace google_breakpad { + +class StabsHandler; + +class StabsReader { + public: + // Create a reader for the STABS debug information whose .stab section is + // being traversed by ITERATOR, and whose .stabstr section is referred to + // by STRINGS. The reader will call the member functions of HANDLER to + // report the information it finds, when the reader's 'Process' member + // function is called. + // + // BIG_ENDIAN should be true if the entries in the .stab section are in + // big-endian form, or false if they are in little-endian form. + // + // VALUE_SIZE should be either 4 or 8, indicating the size of the 'value' + // field in each entry in bytes. + // + // UNITIZED should be true if the STABS data is stored in units with + // N_UNDF headers. This is usually the case for STABS stored in sections, + // like .stab/.stabstr, and usually not the case for STABS stored in the + // actual symbol table; UNITIZED should be true when parsing Linux stabs, + // false when parsing Mac OS X STABS. For details, see: + // http://sourceware.org/gdb/current/onlinedocs/stabs/Stab-Section-Basics.html + // + // Note that, in ELF, the .stabstr section should be found using the + // 'sh_link' field of the .stab section header, not by name. + StabsReader(const uint8_t *stab, size_t stab_size, + const uint8_t *stabstr, size_t stabstr_size, + bool big_endian, size_t value_size, bool unitized, + StabsHandler *handler); + + // Process the STABS data, calling the handler's member functions to + // report what we find. While the handler functions return true, + // continue to process until we reach the end of the section. If we + // processed the entire section and all handlers returned true, + // return true. If any handler returned false, return false. + // + // This is only meant to be called once per StabsReader instance; + // resuming a prior processing pass that stopped abruptly isn't supported. + bool Process(); + + private: + + // An class for walking arrays of STABS entries. This isolates the main + // STABS reader from the exact format (size; endianness) of the entries + // themselves. + class EntryIterator { + public: + // The contents of a STABS entry, adjusted for the host's endianness, + // word size, 'struct nlist' layout, and so on. + struct Entry { + // True if this iterator has reached the end of the entry array. When + // this is set, the other members of this structure are not valid. + bool at_end; + + // The number of this entry within the list. + size_t index; + + // The current entry's name offset. This is the offset within the + // current compilation unit's strings, as establish by the N_UNDF entries. + size_t name_offset; + + // The current entry's type, 'other' field, descriptor, and value. + unsigned char type; + unsigned char other; + short descriptor; + uint64_t value; + }; + + // Create a EntryIterator walking the entries in BUFFER. Treat the + // entries as big-endian if BIG_ENDIAN is true, as little-endian + // otherwise. Assume each entry has a 'value' field whose size is + // VALUE_SIZE. + // + // This would not be terribly clean to extend to other format variations, + // but it's enough to handle Linux and Mac, and we'd like STABS to die + // anyway. + // + // For the record: on Linux, STABS entry values are always 32 bits, + // regardless of the architecture address size (don't ask me why); on + // Mac, they are 32 or 64 bits long. Oddly, the section header's entry + // size for a Linux ELF .stab section varies according to the ELF class + // from 12 to 20 even as the actual entries remain unchanged. + EntryIterator(const ByteBuffer *buffer, bool big_endian, size_t value_size); + + // Move to the next entry. This function's behavior is undefined if + // at_end() is true when it is called. + EntryIterator &operator++() { Fetch(); entry_.index++; return *this; } + + // Dereferencing this iterator produces a reference to an Entry structure + // that holds the current entry's values. The entry is owned by this + // EntryIterator, and will be invalidated at the next call to operator++. + const Entry &operator*() const { return entry_; } + const Entry *operator->() const { return &entry_; } + + private: + // Read the STABS entry at cursor_, and set entry_ appropriately. + void Fetch(); + + // The size of entries' value field, in bytes. + size_t value_size_; + + // A byte cursor traversing buffer_. + ByteCursor cursor_; + + // Values for the entry this iterator refers to. + Entry entry_; + }; + + // A source line, saved to be reported later. + struct Line { + uint64_t address; + const char *filename; + int number; + }; + + // Return the name of the current symbol. + const char *SymbolString(); + + // Process a compilation unit starting at symbol_. Return true + // to continue processing, or false to abort. + bool ProcessCompilationUnit(); + + // Process a function in current_source_file_ starting at symbol_. + // Return true to continue processing, or false to abort. + bool ProcessFunction(); + + // Process an exported function symbol. + // Return true to continue processing, or false to abort. + bool ProcessExtern(); + + // The STABS entries being parsed. + ByteBuffer entries_; + + // The string section to which the entries refer. + ByteBuffer strings_; + + // The iterator walking the STABS entries. + EntryIterator iterator_; + + // True if the data is "unitized"; see the explanation in the comment for + // StabsReader::StabsReader. + bool unitized_; + + StabsHandler *handler_; + + // The offset of the current compilation unit's strings within stabstr_. + size_t string_offset_; + + // The value string_offset_ should have for the next compilation unit, + // as established by N_UNDF entries. + size_t next_cu_string_offset_; + + // The current source file name. + const char *current_source_file_; + + // Mac OS X STABS place SLINE records before functions; we accumulate a + // vector of these until we see the FUN record, and then report them + // after the StartFunction call. + std::vector<Line> queued_lines_; +}; + +// Consumer-provided callback structure for the STABS reader. Clients +// of the STABS reader provide an instance of this structure. The +// reader then invokes the member functions of that instance to report +// the information it finds. +// +// The default definitions of the member functions do nothing, and return +// true so processing will continue. +class StabsHandler { + public: + StabsHandler() { } + virtual ~StabsHandler() { } + + // Some general notes about the handler callback functions: + + // Processing proceeds until the end of the .stabs section, or until + // one of these functions returns false. + + // The addresses given are as reported in the STABS info, without + // regard for whether the module may be loaded at different + // addresses at different times (a shared library, say). When + // processing STABS from an ELF shared library, the addresses given + // all assume the library is loaded at its nominal load address. + // They are *not* offsets from the nominal load address. If you + // want offsets, you must subtract off the library's nominal load + // address. + + // The arguments to these functions named FILENAME are all + // references to strings stored in the .stabstr section. Because + // both the Linux and Solaris linkers factor out duplicate strings + // from the .stabstr section, the consumer can assume that if two + // FILENAME values are different addresses, they represent different + // file names. + // + // Thus, it's safe to use (say) std::map<char *, ...>, which does + // string address comparisons, not string content comparisons. + // Since all the strings are in same array of characters --- the + // .stabstr section --- comparing their addresses produces + // predictable, if not lexicographically meaningful, results. + + // Begin processing a compilation unit whose main source file is + // named FILENAME, and whose base address is ADDRESS. If + // BUILD_DIRECTORY is non-NULL, it is the name of the build + // directory in which the compilation occurred. + virtual bool StartCompilationUnit(const char *filename, uint64_t address, + const char *build_directory) { + return true; + } + + // Finish processing the compilation unit. If ADDRESS is non-zero, + // it is the ending address of the compilation unit. If ADDRESS is + // zero, then the compilation unit's ending address is not + // available, and the consumer must infer it by other means. + virtual bool EndCompilationUnit(uint64_t address) { return true; } + + // Begin processing a function named NAME, whose starting address is + // ADDRESS. This function belongs to the compilation unit that was + // most recently started but not ended. + // + // Note that, unlike filenames, NAME is not a pointer into the + // .stabstr section; this is because the name as it appears in the + // STABS data is followed by type information. The value passed to + // StartFunction is the function name alone. + // + // In languages that use name mangling, like C++, NAME is mangled. + virtual bool StartFunction(const string &name, uint64_t address) { + return true; + } + + // Finish processing the function. If ADDRESS is non-zero, it is + // the ending address for the function. If ADDRESS is zero, then + // the function's ending address is not available, and the consumer + // must infer it by other means. + virtual bool EndFunction(uint64_t address) { return true; } + + // Report that the code at ADDRESS is attributable to line NUMBER of + // the source file named FILENAME. The caller must infer the ending + // address of the line. + virtual bool Line(uint64_t address, const char *filename, int number) { + return true; + } + + // Report that an exported function NAME is present at ADDRESS. + // The size of the function is unknown. + virtual bool Extern(const string &name, uint64_t address) { + return true; + } + + // Report a warning. FORMAT is a printf-like format string, + // specifying how to format the subsequent arguments. + virtual void Warning(const char *format, ...) = 0; +}; + +} // namespace google_breakpad + +#endif // COMMON_STABS_READER_H__ diff --git a/3rdParty/Breakpad/src/common/stabs_to_module.cc b/3rdParty/Breakpad/src/common/stabs_to_module.cc new file mode 100644 index 0000000..e59aebd --- /dev/null +++ b/3rdParty/Breakpad/src/common/stabs_to_module.cc @@ -0,0 +1,200 @@ +// Copyright (c) 2010 Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// dump_stabs.cc --- implement the StabsToModule class. + +#include <assert.h> +#include <cxxabi.h> +#include <stdarg.h> +#include <stdio.h> + +#include <algorithm> + +#include "common/stabs_to_module.h" +#include "common/using_std_string.h" + +namespace google_breakpad { + +// Demangle using abi call. +// Older GCC may not support it. +static string Demangle(const string &mangled) { + int status = 0; + char *demangled = abi::__cxa_demangle(mangled.c_str(), NULL, NULL, &status); + if (status == 0 && demangled != NULL) { + string str(demangled); + free(demangled); + return str; + } + return string(mangled); +} + +StabsToModule::~StabsToModule() { + // Free any functions we've accumulated but not added to the module. + for (vector<Module::Function *>::const_iterator func_it = functions_.begin(); + func_it != functions_.end(); func_it++) + delete *func_it; + // Free any function that we're currently within. + delete current_function_; +} + +bool StabsToModule::StartCompilationUnit(const char *name, uint64_t address, + const char *build_directory) { + assert(!in_compilation_unit_); + in_compilation_unit_ = true; + current_source_file_name_ = name; + current_source_file_ = module_->FindFile(name); + comp_unit_base_address_ = address; + boundaries_.push_back(static_cast<Module::Address>(address)); + return true; +} + +bool StabsToModule::EndCompilationUnit(uint64_t address) { + assert(in_compilation_unit_); + in_compilation_unit_ = false; + comp_unit_base_address_ = 0; + current_source_file_ = NULL; + current_source_file_name_ = NULL; + if (address) + boundaries_.push_back(static_cast<Module::Address>(address)); + return true; +} + +bool StabsToModule::StartFunction(const string &name, + uint64_t address) { + assert(!current_function_); + Module::Function *f = new Module::Function; + f->name = Demangle(name); + f->address = address; + f->size = 0; // We compute this in StabsToModule::Finalize(). + f->parameter_size = 0; // We don't provide this information. + current_function_ = f; + boundaries_.push_back(static_cast<Module::Address>(address)); + return true; +} + +bool StabsToModule::EndFunction(uint64_t address) { + assert(current_function_); + // Functions in this compilation unit should have address bigger + // than the compilation unit's starting address. There may be a lot + // of duplicated entries for functions in the STABS data. We will + // count on the Module to remove the duplicates. + if (current_function_->address >= comp_unit_base_address_) + functions_.push_back(current_function_); + else + delete current_function_; + current_function_ = NULL; + if (address) + boundaries_.push_back(static_cast<Module::Address>(address)); + return true; +} + +bool StabsToModule::Line(uint64_t address, const char *name, int number) { + assert(current_function_); + assert(current_source_file_); + if (name != current_source_file_name_) { + current_source_file_ = module_->FindFile(name); + current_source_file_name_ = name; + } + Module::Line line; + line.address = address; + line.size = 0; // We compute this in StabsToModule::Finalize(). + line.file = current_source_file_; + line.number = number; + current_function_->lines.push_back(line); + return true; +} + +bool StabsToModule::Extern(const string &name, uint64_t address) { + Module::Extern *ext = new Module::Extern; + // Older libstdc++ demangle implementations can crash on unexpected + // input, so be careful about what gets passed in. + if (name.compare(0, 3, "__Z") == 0) { + ext->name = Demangle(name.substr(1)); + } else if (name[0] == '_') { + ext->name = name.substr(1); + } else { + ext->name = name; + } + ext->address = address; + module_->AddExtern(ext); + return true; +} + +void StabsToModule::Warning(const char *format, ...) { + va_list args; + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); +} + +void StabsToModule::Finalize() { + // Sort our boundary list, so we can search it quickly. + sort(boundaries_.begin(), boundaries_.end()); + // Sort all functions by address, just for neatness. + sort(functions_.begin(), functions_.end(), + Module::Function::CompareByAddress); + + for (vector<Module::Function *>::const_iterator func_it = functions_.begin(); + func_it != functions_.end(); + func_it++) { + Module::Function *f = *func_it; + // Compute the function f's size. + vector<Module::Address>::const_iterator boundary + = std::upper_bound(boundaries_.begin(), boundaries_.end(), f->address); + if (boundary != boundaries_.end()) + f->size = *boundary - f->address; + else + // If this is the last function in the module, and the STABS + // reader was unable to give us its ending address, then assign + // it a bogus, very large value. This will happen at most once + // per module: since we've added all functions' addresses to the + // boundary table, only one can be the last. + f->size = kFallbackSize; + + // Compute sizes for each of the function f's lines --- if it has any. + if (!f->lines.empty()) { + stable_sort(f->lines.begin(), f->lines.end(), + Module::Line::CompareByAddress); + vector<Module::Line>::iterator last_line = f->lines.end() - 1; + for (vector<Module::Line>::iterator line_it = f->lines.begin(); + line_it != last_line; line_it++) + line_it[0].size = line_it[1].address - line_it[0].address; + // Compute the size of the last line from f's end address. + last_line->size = (f->address + f->size) - last_line->address; + } + } + // Now that everything has a size, add our functions to the module, and + // dispose of our private list. + module_->AddFunctions(functions_.begin(), functions_.end()); + functions_.clear(); +} + +} // namespace google_breakpad diff --git a/3rdParty/Breakpad/src/common/stabs_to_module.h b/3rdParty/Breakpad/src/common/stabs_to_module.h new file mode 100644 index 0000000..5e04fa7 --- /dev/null +++ b/3rdParty/Breakpad/src/common/stabs_to_module.h @@ -0,0 +1,143 @@ +// -*- mode: C++ -*- + +// Copyright (c) 2010 Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// dump_stabs.h: Define the StabsToModule class, which receives +// STABS debugging information from a parser and adds it to a Breakpad +// symbol file. + +#ifndef BREAKPAD_COMMON_STABS_TO_MODULE_H_ +#define BREAKPAD_COMMON_STABS_TO_MODULE_H_ + +#include <stdint.h> + +#include <string> +#include <vector> + +#include "common/module.h" +#include "common/stabs_reader.h" +#include "common/using_std_string.h" + +namespace google_breakpad { + +using std::vector; + +// A StabsToModule is a handler that receives parsed STABS debugging +// information from a StabsReader, and uses that to populate +// a Module. (All classes are in the google_breakpad namespace.) A +// Module represents the contents of a Breakpad symbol file, and knows +// how to write itself out as such. A StabsToModule thus acts as +// the bridge between STABS and Breakpad data. +// When processing Darwin Mach-O files, this also receives public linker +// symbols, like those found in system libraries. +class StabsToModule: public google_breakpad::StabsHandler { + public: + // Receive parsed debugging information from a StabsReader, and + // store it all in MODULE. + StabsToModule(Module *module) : + module_(module), + in_compilation_unit_(false), + comp_unit_base_address_(0), + current_function_(NULL), + current_source_file_(NULL), + current_source_file_name_(NULL) { } + ~StabsToModule(); + + // The standard StabsHandler virtual member functions. + bool StartCompilationUnit(const char *name, uint64_t address, + const char *build_directory); + bool EndCompilationUnit(uint64_t address); + bool StartFunction(const string &name, uint64_t address); + bool EndFunction(uint64_t address); + bool Line(uint64_t address, const char *name, int number); + bool Extern(const string &name, uint64_t address); + void Warning(const char *format, ...); + + // Do any final processing necessary to make module_ contain all the + // data provided by the STABS reader. + // + // Because STABS does not provide reliable size information for + // functions and lines, we need to make a pass over the data after + // processing all the STABS to compute those sizes. We take care of + // that here. + void Finalize(); + + private: + + // An arbitrary, but very large, size to use for functions whose + // size we can't compute properly. + static const uint64_t kFallbackSize = 0x10000000; + + // The module we're contributing debugging info to. + Module *module_; + + // The functions we've generated so far. We don't add these to + // module_ as we parse them. Instead, we wait until we've computed + // their ending address, and their lines' ending addresses. + // + // We could just stick them in module_ from the outset, but if + // module_ already contains data gathered from other debugging + // formats, that would complicate the size computation. + vector<Module::Function *> functions_; + + // Boundary addresses. STABS doesn't necessarily supply sizes for + // functions and lines, so we need to compute them ourselves by + // finding the next object. + vector<Module::Address> boundaries_; + + // True if we are currently within a compilation unit: we have gotten a + // StartCompilationUnit call, but no matching EndCompilationUnit call + // yet. We use this for sanity checks. + bool in_compilation_unit_; + + // The base address of the current compilation unit. We use this to + // recognize functions we should omit from the symbol file. (If you + // know the details of why we omit these, please patch this + // comment.) + Module::Address comp_unit_base_address_; + + // The function we're currently contributing lines to. + Module::Function *current_function_; + + // The last Module::File we got a line number in. + Module::File *current_source_file_; + + // The pointer in the .stabstr section of the name that + // current_source_file_ is built from. This allows us to quickly + // recognize when the current line is in the same file as the + // previous one (which it usually is). + const char *current_source_file_name_; +}; + +} // namespace google_breakpad + +#endif // BREAKPAD_COMMON_STABS_TO_MODULE_H_ diff --git a/3rdParty/Breakpad/src/common/string_conversion.cc b/3rdParty/Breakpad/src/common/string_conversion.cc new file mode 100644 index 0000000..27fb8cd --- /dev/null +++ b/3rdParty/Breakpad/src/common/string_conversion.cc @@ -0,0 +1,155 @@ +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <string.h> + +#include "common/convert_UTF.h" +#include "common/string_conversion.h" +#include "common/using_std_string.h" +#include "processor/scoped_ptr.h" + +namespace google_breakpad { + +using std::vector; + +void UTF8ToUTF16(const char *in, vector<u_int16_t> *out) { + size_t source_length = strlen(in); + const UTF8 *source_ptr = reinterpret_cast<const UTF8 *>(in); + const UTF8 *source_end_ptr = source_ptr + source_length; + // Erase the contents and zero fill to the expected size + out->clear(); + out->insert(out->begin(), source_length, 0); + u_int16_t *target_ptr = &(*out)[0]; + u_int16_t *target_end_ptr = target_ptr + out->capacity() * sizeof(u_int16_t); + ConversionResult result = ConvertUTF8toUTF16(&source_ptr, source_end_ptr, + &target_ptr, target_end_ptr, + strictConversion); + + // Resize to be the size of the # of converted characters + NULL + out->resize(result == conversionOK ? target_ptr - &(*out)[0] + 1: 0); +} + +int UTF8ToUTF16Char(const char *in, int in_length, u_int16_t out[2]) { + const UTF8 *source_ptr = reinterpret_cast<const UTF8 *>(in); + const UTF8 *source_end_ptr = source_ptr + sizeof(char); + u_int16_t *target_ptr = out; + u_int16_t *target_end_ptr = target_ptr + 2 * sizeof(u_int16_t); + out[0] = out[1] = 0; + + // Process one character at a time + while (1) { + ConversionResult result = ConvertUTF8toUTF16(&source_ptr, source_end_ptr, + &target_ptr, target_end_ptr, + strictConversion); + + if (result == conversionOK) + return static_cast<int>(source_ptr - reinterpret_cast<const UTF8 *>(in)); + + // Add another character to the input stream and try again + source_ptr = reinterpret_cast<const UTF8 *>(in); + ++source_end_ptr; + + if (source_end_ptr > reinterpret_cast<const UTF8 *>(in) + in_length) + break; + } + + return 0; +} + +void UTF32ToUTF16(const wchar_t *in, vector<u_int16_t> *out) { + size_t source_length = wcslen(in); + const UTF32 *source_ptr = reinterpret_cast<const UTF32 *>(in); + const UTF32 *source_end_ptr = source_ptr + source_length; + // Erase the contents and zero fill to the expected size + out->clear(); + out->insert(out->begin(), source_length, 0); + u_int16_t *target_ptr = &(*out)[0]; + u_int16_t *target_end_ptr = target_ptr + out->capacity() * sizeof(u_int16_t); + ConversionResult result = ConvertUTF32toUTF16(&source_ptr, source_end_ptr, + &target_ptr, target_end_ptr, + strictConversion); + + // Resize to be the size of the # of converted characters + NULL + out->resize(result == conversionOK ? target_ptr - &(*out)[0] + 1: 0); +} + +void UTF32ToUTF16Char(wchar_t in, u_int16_t out[2]) { + const UTF32 *source_ptr = reinterpret_cast<const UTF32 *>(&in); + const UTF32 *source_end_ptr = source_ptr + 1; + u_int16_t *target_ptr = out; + u_int16_t *target_end_ptr = target_ptr + 2 * sizeof(u_int16_t); + out[0] = out[1] = 0; + ConversionResult result = ConvertUTF32toUTF16(&source_ptr, source_end_ptr, + &target_ptr, target_end_ptr, + strictConversion); + + if (result != conversionOK) { + out[0] = out[1] = 0; + } +} + +static inline u_int16_t Swap(u_int16_t value) { + return (value >> 8) | (value << 8); +} + +string UTF16ToUTF8(const vector<u_int16_t> &in, bool swap) { + const UTF16 *source_ptr = &in[0]; + scoped_ptr<u_int16_t> source_buffer; + + // If we're to swap, we need to make a local copy and swap each byte pair + if (swap) { + int idx = 0; + source_buffer.reset(new u_int16_t[in.size()]); + UTF16 *source_buffer_ptr = source_buffer.get(); + for (vector<u_int16_t>::const_iterator it = in.begin(); + it != in.end(); ++it, ++idx) + source_buffer_ptr[idx] = Swap(*it); + + source_ptr = source_buffer.get(); + } + + // The maximum expansion would be 4x the size of the input string. + const UTF16 *source_end_ptr = source_ptr + in.size(); + size_t target_capacity = in.size() * 4; + scoped_array<UTF8> target_buffer(new UTF8[target_capacity]); + UTF8 *target_ptr = target_buffer.get(); + UTF8 *target_end_ptr = target_ptr + target_capacity; + ConversionResult result = ConvertUTF16toUTF8(&source_ptr, source_end_ptr, + &target_ptr, target_end_ptr, + strictConversion); + + if (result == conversionOK) { + const char *targetPtr = reinterpret_cast<const char *>(target_buffer.get()); + return targetPtr; + } + + return ""; +} + +} // namespace google_breakpad diff --git a/3rdParty/Breakpad/src/common/string_conversion.h b/3rdParty/Breakpad/src/common/string_conversion.h new file mode 100644 index 0000000..eeed4d2 --- /dev/null +++ b/3rdParty/Breakpad/src/common/string_conversion.h @@ -0,0 +1,68 @@ +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// string_conversion.h: Conversion between different UTF-8/16/32 encodings. + +#ifndef COMMON_STRING_CONVERSION_H__ +#define COMMON_STRING_CONVERSION_H__ + +#include <string> +#include <vector> + +#include "common/using_std_string.h" +#include "google_breakpad/common/breakpad_types.h" + +namespace google_breakpad { + +using std::vector; + +// Convert |in| to UTF-16 into |out|. Use platform byte ordering. If the +// conversion failed, |out| will be zero length. +void UTF8ToUTF16(const char *in, vector<u_int16_t> *out); + +// Convert at least one character (up to a maximum of |in_length|) from |in| +// to UTF-16 into |out|. Return the number of characters consumed from |in|. +// Any unused characters in |out| will be initialized to 0. No memory will +// be allocated by this routine. +int UTF8ToUTF16Char(const char *in, int in_length, u_int16_t out[2]); + +// Convert |in| to UTF-16 into |out|. Use platform byte ordering. If the +// conversion failed, |out| will be zero length. +void UTF32ToUTF16(const wchar_t *in, vector<u_int16_t> *out); + +// Convert |in| to UTF-16 into |out|. Any unused characters in |out| will be +// initialized to 0. No memory will be allocated by this routine. +void UTF32ToUTF16Char(wchar_t in, u_int16_t out[2]); + +// Convert |in| to UTF-8. If |swap| is true, swap bytes before converting. +string UTF16ToUTF8(const vector<u_int16_t> &in, bool swap); + +} // namespace google_breakpad + +#endif // COMMON_STRING_CONVERSION_H__ diff --git a/3rdParty/Breakpad/src/common/using_std_string.h b/3rdParty/Breakpad/src/common/using_std_string.h new file mode 100644 index 0000000..13c1da5 --- /dev/null +++ b/3rdParty/Breakpad/src/common/using_std_string.h @@ -0,0 +1,65 @@ +// -*- mode: C++ -*- + +// Copyright (c) 2012, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Ivan Penkov + +// using_std_string.h: Allows building this code in environments where +// global string (::string) exists. +// +// The problem: +// ------------- +// Let's say you want to build this code in an environment where a global +// string type is defined (i.e. ::string). Now, let's suppose that ::string +// is different that std::string and you'd like to have the option to easily +// choose between the two string types. Ideally you'd like to control which +// string type is chosen by simply #defining an identifier. +// +// The solution: +// ------------- +// #define HAS_GLOBAL_STRING somewhere in a global header file and then +// globally replace std::string with string. Then include this header +// file everywhere where string is used. If you want to revert back to +// using std::string, simply remove the #define (HAS_GLOBAL_STRING). + +#ifndef THIRD_PARTY_BREAKPAD_SRC_COMMON_USING_STD_STRING_H_ +#define THIRD_PARTY_BREAKPAD_SRC_COMMON_USING_STD_STRING_H_ + +#ifdef HAS_GLOBAL_STRING + typedef ::string google_breakpad_string; +#else + using std::string; + typedef std::string google_breakpad_string; +#endif + +// Inicates that type google_breakpad_string is defined +#define HAS_GOOGLE_BREAKPAD_STRING + +#endif // THIRD_PARTY_BREAKPAD_SRC_COMMON_USING_STD_STRING_H_ diff --git a/3rdParty/Breakpad/src/common/windows/guid_string.cc b/3rdParty/Breakpad/src/common/windows/guid_string.cc new file mode 100644 index 0000000..b7f877e --- /dev/null +++ b/3rdParty/Breakpad/src/common/windows/guid_string.cc @@ -0,0 +1,76 @@ +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// guid_string.cc: Convert GUIDs to strings. +// +// See guid_string.h for documentation. + +#include <wchar.h> + +#include "common/windows/string_utils-inl.h" + +#include "common/windows/guid_string.h" + +namespace google_breakpad { + +// static +wstring GUIDString::GUIDToWString(GUID *guid) { + wchar_t guid_string[37]; + swprintf( + guid_string, sizeof(guid_string) / sizeof(guid_string[0]), + L"%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x", + guid->Data1, guid->Data2, guid->Data3, + guid->Data4[0], guid->Data4[1], guid->Data4[2], + guid->Data4[3], guid->Data4[4], guid->Data4[5], + guid->Data4[6], guid->Data4[7]); + + // remove when VC++7.1 is no longer supported + guid_string[sizeof(guid_string) / sizeof(guid_string[0]) - 1] = L'\0'; + + return wstring(guid_string); +} + +// static +wstring GUIDString::GUIDToSymbolServerWString(GUID *guid) { + wchar_t guid_string[33]; + swprintf( + guid_string, sizeof(guid_string) / sizeof(guid_string[0]), + L"%08X%04X%04X%02X%02X%02X%02X%02X%02X%02X%02X", + guid->Data1, guid->Data2, guid->Data3, + guid->Data4[0], guid->Data4[1], guid->Data4[2], + guid->Data4[3], guid->Data4[4], guid->Data4[5], + guid->Data4[6], guid->Data4[7]); + + // remove when VC++7.1 is no longer supported + guid_string[sizeof(guid_string) / sizeof(guid_string[0]) - 1] = L'\0'; + + return wstring(guid_string); +} + +} // namespace google_breakpad diff --git a/3rdParty/Breakpad/src/common/windows/guid_string.h b/3rdParty/Breakpad/src/common/windows/guid_string.h new file mode 100644 index 0000000..f8aa8a2 --- /dev/null +++ b/3rdParty/Breakpad/src/common/windows/guid_string.h @@ -0,0 +1,58 @@ +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// guid_string.cc: Convert GUIDs to strings. + +#ifndef COMMON_WINDOWS_GUID_STRING_H__ +#define COMMON_WINDOWS_GUID_STRING_H__ + +#include <Guiddef.h> + +#include <string> + +namespace google_breakpad { + +using std::wstring; + +class GUIDString { + public: + // Converts guid to a string in the format recommended by RFC 4122 and + // returns the string. + static wstring GUIDToWString(GUID *guid); + + // Converts guid to a string formatted as uppercase hexadecimal, with + // no separators, and returns the string. This is the format used for + // symbol server identifiers, although identifiers have an age tacked + // on to the string. + static wstring GUIDToSymbolServerWString(GUID *guid); +}; + +} // namespace google_breakpad + +#endif // COMMON_WINDOWS_GUID_STRING_H__ diff --git a/3rdParty/Breakpad/src/common/windows/string_utils-inl.h b/3rdParty/Breakpad/src/common/windows/string_utils-inl.h new file mode 100644 index 0000000..d281aaa --- /dev/null +++ b/3rdParty/Breakpad/src/common/windows/string_utils-inl.h @@ -0,0 +1,142 @@ +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// string_utils-inl.h: Safer string manipulation on Windows, supporting +// pre-MSVC8 environments. + +#ifndef COMMON_WINDOWS_STRING_UTILS_INL_H__ +#define COMMON_WINDOWS_STRING_UTILS_INL_H__ + +#include <stdarg.h> +#include <wchar.h> + +#include <string> + +// The "ll" printf format size specifier corresponding to |long long| was +// intrudced in MSVC8. Earlier versions did not provide this size specifier, +// but "I64" can be used to print 64-bit types. Don't use "I64" where "ll" +// is available, in the event of oddball systems where |long long| is not +// 64 bits wide. +#if _MSC_VER >= 1400 // MSVC 2005/8 +#define WIN_STRING_FORMAT_LL "ll" +#else // MSC_VER >= 1400 +#define WIN_STRING_FORMAT_LL "I64" +#endif // MSC_VER >= 1400 + +// A nonconforming version of swprintf, without the length argument, was +// included with the CRT prior to MSVC8. Although a conforming version was +// also available via an overload, it is not reliably chosen. _snwprintf +// behaves as a standards-confirming swprintf should, so force the use of +// _snwprintf when using older CRTs. +#if _MSC_VER < 1400 // MSVC 2005/8 +#define swprintf _snwprintf +#else +// For MSVC8 and newer, swprintf_s is the recommended method. Conveniently, +// it takes the same argument list as swprintf. +#define swprintf swprintf_s +#endif // MSC_VER < 1400 + +namespace google_breakpad { + +using std::string; +using std::wstring; + +class WindowsStringUtils { + public: + // Roughly equivalent to MSVC8's wcscpy_s, except pre-MSVC8, this does + // not fail if source is longer than destination_size. The destination + // buffer is always 0-terminated. + static void safe_wcscpy(wchar_t *destination, size_t destination_size, + const wchar_t *source); + + // Roughly equivalent to MSVC8's wcsncpy_s, except that _TRUNCATE cannot + // be passed directly, and pre-MSVC8, this will not fail if source or count + // are longer than destination_size. The destination buffer is always + // 0-terminated. + static void safe_wcsncpy(wchar_t *destination, size_t destination_size, + const wchar_t *source, size_t count); + + // Performs multi-byte to wide character conversion on C++ strings, using + // mbstowcs_s (MSVC8) or mbstowcs (pre-MSVC8). Returns false on failure, + // without setting wcs. + static bool safe_mbstowcs(const string &mbs, wstring *wcs); + + // The inverse of safe_mbstowcs. + static bool safe_wcstombs(const wstring &wcs, string *mbs); + + // Returns the base name of a file, e.g. strips off the path. + static wstring GetBaseName(const wstring &filename); + + private: + // Disallow instantiation and other object-based operations. + WindowsStringUtils(); + WindowsStringUtils(const WindowsStringUtils&); + ~WindowsStringUtils(); + void operator=(const WindowsStringUtils&); +}; + +// static +inline void WindowsStringUtils::safe_wcscpy(wchar_t *destination, + size_t destination_size, + const wchar_t *source) { +#if _MSC_VER >= 1400 // MSVC 2005/8 + wcscpy_s(destination, destination_size, source); +#else // _MSC_VER >= 1400 + // Pre-MSVC 2005/8 doesn't have wcscpy_s. Simulate it with wcsncpy. + // wcsncpy doesn't 0-terminate the destination buffer if the source string + // is longer than size. Ensure that the destination is 0-terminated. + wcsncpy(destination, source, destination_size); + if (destination && destination_size) + destination[destination_size - 1] = 0; +#endif // _MSC_VER >= 1400 +} + +// static +inline void WindowsStringUtils::safe_wcsncpy(wchar_t *destination, + size_t destination_size, + const wchar_t *source, + size_t count) { +#if _MSC_VER >= 1400 // MSVC 2005/8 + wcsncpy_s(destination, destination_size, source, count); +#else // _MSC_VER >= 1400 + // Pre-MSVC 2005/8 doesn't have wcsncpy_s. Simulate it with wcsncpy. + // wcsncpy doesn't 0-terminate the destination buffer if the source string + // is longer than size. Ensure that the destination is 0-terminated. + if (destination_size < count) + count = destination_size; + + wcsncpy(destination, source, count); + if (destination && count) + destination[count - 1] = 0; +#endif // _MSC_VER >= 1400 +} + +} // namespace google_breakpad + +#endif // COMMON_WINDOWS_STRING_UTILS_INL_H__ |