summaryrefslogtreecommitdiffstats
blob: 1fdd4cb716fad4c7cfd75d85b1f8b2018abda24c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
// -*- mode: c++ -*-

// Copyright (c) 2010 Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>

// The DwarfLineToModule class accepts line number information from a
// DWARF parser and adds it to a google_breakpad::Module. The Module
// can write that data out as a Breakpad symbol file.

#ifndef COMMON_LINUX_DWARF_LINE_TO_MODULE_H
#define COMMON_LINUX_DWARF_LINE_TO_MODULE_H

#include <string>

#include "common/module.h"
#include "common/dwarf/dwarf2reader.h"
#include "common/using_std_string.h"

namespace google_breakpad {

// A class for producing a vector of google_breakpad::Module::Line
// instances from parsed DWARF line number data.  
//
// An instance of this class can be provided as a handler to a
// dwarf2reader::LineInfo DWARF line number information parser. The
// handler accepts source location information from the parser and
// uses it to produce a vector of google_breakpad::Module::Line
// objects, referring to google_breakpad::Module::File objects added
// to a particular google_breakpad::Module.
//
// GNU toolchain omitted sections support:
// ======================================
//
// Given the right options, the GNU toolchain will omit unreferenced
// functions from the final executable. Unfortunately, when it does so, it
// does not remove the associated portions of the DWARF line number
// program; instead, it gives the DW_LNE_set_address instructions referring
// to the now-deleted code addresses of zero. Given this input, the DWARF
// line parser will call AddLine with a series of lines starting at address
// zero. For example, here is the output from 'readelf -wl' for a program
// with four functions, the first three of which have been omitted:
//
//   Line Number Statements:
//    Extended opcode 2: set Address to 0x0
//    Advance Line by 14 to 15
//    Copy
//    Special opcode 48: advance Address by 3 to 0x3 and Line by 1 to 16
//    Special opcode 119: advance Address by 8 to 0xb and Line by 2 to 18
//    Advance PC by 2 to 0xd
//    Extended opcode 1: End of Sequence
// 
//    Extended opcode 2: set Address to 0x0
//    Advance Line by 14 to 15
//    Copy
//    Special opcode 48: advance Address by 3 to 0x3 and Line by 1 to 16
//    Special opcode 119: advance Address by 8 to 0xb and Line by 2 to 18
//    Advance PC by 2 to 0xd
//    Extended opcode 1: End of Sequence
// 
//    Extended opcode 2: set Address to 0x0
//    Advance Line by 19 to 20
//    Copy
//    Special opcode 48: advance Address by 3 to 0x3 and Line by 1 to 21
//    Special opcode 76: advance Address by 5 to 0x8 and Line by 1 to 22
//    Advance PC by 2 to 0xa
//    Extended opcode 1: End of Sequence
// 
//    Extended opcode 2: set Address to 0x80483a4
//    Advance Line by 23 to 24
//    Copy
//    Special opcode 202: advance Address by 14 to 0x80483b2 and Line by 1 to 25
//    Special opcode 76: advance Address by 5 to 0x80483b7 and Line by 1 to 26
//    Advance PC by 6 to 0x80483bd
//    Extended opcode 1: End of Sequence
//
// Instead of collecting runs of lines describing code that is not there,
// we try to recognize and drop them. Since the linker doesn't explicitly
// distinguish references to dropped sections from genuine references to
// code at address zero, we must use a heuristic. We have chosen:
//
// - If a line starts at address zero, omit it. (On the platforms
//   breakpad targets, it is extremely unlikely that there will be code
//   at address zero.)
//
// - If a line starts immediately after an omitted line, omit it too.
class DwarfLineToModule: public dwarf2reader::LineInfoHandler {
 public:
  // As the DWARF line info parser passes us line records, add source
  // files to MODULE, and add all lines to the end of LINES. LINES
  // need not be empty. If the parser hands us a zero-length line, we
  // omit it. If the parser hands us a line that extends beyond the
  // end of the address space, we clip it. It's up to our client to
  // sort out which lines belong to which functions; we don't add them
  // to any particular function in MODULE ourselves.
  DwarfLineToModule(Module *module, const string& compilation_dir,
                    vector<Module::Line> *lines)
      : module_(module),
        compilation_dir_(compilation_dir),
        lines_(lines),
        highest_file_number_(-1),
        omitted_line_end_(0),
        warned_bad_file_number_(false),
        warned_bad_directory_number_(false) { }
  
  ~DwarfLineToModule() { }

  void DefineDir(const string &name, uint32 dir_num);
  void DefineFile(const string &name, int32 file_num,
                  uint32 dir_num, uint64 mod_time,
                  uint64 length);
  void AddLine(uint64 address, uint64 length,
               uint32 file_num, uint32 line_num, uint32 column_num);

 private:

  typedef std::map<uint32, string> DirectoryTable;
  typedef std::map<uint32, Module::File *> FileTable;

  // The module we're contributing debugging info to. Owned by our
  // client.
  Module *module_;

  // The compilation directory for the current compilation unit whose
  // lines are being accumulated.
  string compilation_dir_;

  // The vector of lines we're accumulating. Owned by our client.
  //
  // In a Module, as in a breakpad symbol file, lines belong to
  // specific functions, but DWARF simply assigns lines to addresses;
  // one must infer the line/function relationship using the
  // functions' beginning and ending addresses. So we can't add these
  // to the appropriate function from module_ until we've read the
  // function info as well. Instead, we accumulate lines here, and let
  // whoever constructed this sort it all out.
  vector<Module::Line> *lines_;

  // A table mapping directory numbers to paths.
  DirectoryTable directories_;

  // A table mapping file numbers to Module::File pointers.
  FileTable files_;

  // The highest file number we've seen so far, or -1 if we've seen
  // none.  Used for dynamically defined file numbers.
  int32 highest_file_number_;
  
  // This is the ending address of the last line we omitted, or zero if we
  // didn't omit the previous line. It is zero before we have received any
  // AddLine calls.
  uint64 omitted_line_end_;

  // True if we've warned about:
  bool warned_bad_file_number_; // bad file numbers
  bool warned_bad_directory_number_; // bad directory numbers
};

} // namespace google_breakpad

#endif // COMMON_LINUX_DWARF_LINE_TO_MODULE_H