diff --git a/LICENSE b/LICENSE index 511feeb..aa2b4ca 100644 --- a/LICENSE +++ b/LICENSE @@ -298,6 +298,8 @@ This product includes code from include-what-you-use. * IWYU driver utility: * build_support/iwyu/iwyu_tool.py +* IWYU include rewrite utility: + * build_support/fix_includes.py Copyright: 2003-2010 University of Illinois at Urbana-Champaign. License: University of Illinois/NCSA Open Source License. diff --git a/build_support/fix_includes.py b/build_support/fix_includes.py new file mode 100644 index 0000000..534a9d1 --- /dev/null +++ b/build_support/fix_includes.py @@ -0,0 +1,2498 @@ +#!/usr/bin/env python3 + +##===--- fix_includes.py - rewrite source files based on iwyu output ------===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +from __future__ import print_function + +"""Update files with the 'correct' #include and forward-declare lines. + +Given the output of include_what_you_use on stdin -- when run at the +(default) --v=1 verbosity level or higher -- modify the files +mentioned in the output, removing their old #include lines and +replacing them with the lines given by the include_what_you_use +script. + +This script runs in four stages. In the first, it groups physical +lines together to form 'move spans'. A 'move span' is the atomic unit +for moving or deleting code. A move span is either a) an #include +line, along with any comment lines immediately preceding it; b) a +forward-declare line -- or more if it's a multi-line forward declare +-- along with preceding comments; c) any other single line. Example: + + // I really am glad I'm forward-declaring this class! + // If I didn't, I'd have to #include the entire world. + template + class MyClass; + +Then, it groups move spans together into 'reorder spans'. These are +spans of code that consist entirely of #includes and forward-declares, +maybe separated by blank lines and comments. We assume that we can +arbitrarily reorder #includes and forward-declares within a reorder +span, without affecting correctness. Things like #ifdefs, #defines, +namespace declarations, static variable declarations, class +definitions, etc -- just about anything -- break up reorder spans. + +In stage 3 it deletes all #include and forward-declare lines that iwyu +says to delete. iwyu includes line numbers for deletion, making this +part easy. If this step results in "empty" #ifdefs or namespaces +(#ifdefs or namespaces with no code inside them), we delete those as +well. We recalculate the reorder spans, which may have gotten bigger +due to the deleted code. + +In stage 4 it adds new iwyu-dictated #includes and forward-declares +after the last existing #includes and forward-declares. Then it +reorders the #includes and forward-declares to match the order +specified by iwyu. It follows iwyu's instructions as much as +possible, modulo the constraint that an #include or forward-declare +cannot leave its current reorder span. + +All this moving messes up the blank lines, which we then need to fix +up. Then we're done! +""" + +__author__ = 'csilvers@google.com (Craig Silverstein)' + +import difflib +import argparse +import os +import re +import sys +from collections import OrderedDict + +_EPILOG = """\ +Reads the output from include-what-you-use on stdin -- run with --v=1 (default) +verbosity level or above -- and, unless --sort_only or --dry_run is specified, +modifies the files mentioned in the output, removing their old #include lines +and replacing them with the lines given by include-what-you-use. It also sorts +the #include and forward-declare lines. + +All files mentioned in include-what-you-use output are modified, unless +filenames are specified on the commandline, in which case only those files are +modified. + +The exit code is non-zero if a critical error occurs, otherwise zero. +""" + +_COMMENT_RE = re.compile(r'\s*//.*') + +# These are the types of lines a file can have. These are matched +# using re.match(), so don't need a leading ^. +_C_COMMENT_START_RE = re.compile(r'\s*/\*') +_C_COMMENT_END_RE = re.compile(r'.*\*/\s*(.*)$') +_COMMENT_LINE_RE = re.compile(r'\s*//') +_PRAGMA_ONCE_LINE_RE = re.compile(r'\s*#\s*pragma\s+once') +_PRAGMA_PUSH_LINE_RE = re.compile(r'\s*#\s*pragma.*push.*') +_PRAGMA_POP_LINE_RE = re.compile(r'\s*#\s*pragma.*pop.*') +_BLANK_LINE_RE = re.compile(r'\s*$') +_IF_RE = re.compile(r'\s*#\s*if') # compiles #if/ifdef/ifndef +_ELSE_RE = re.compile(r'\s*#\s*(else|elif)\b') # compiles #else/elif +_ENDIF_RE = re.compile(r'\s*#\s*endif\b') +# This is used to delete 'empty' namespaces after fwd-decls are removed. +# Some third-party libraries use macros to start/end namespaces. +_NAMESPACE_START_RE = re.compile(r'\s*(namespace\b[^{]*{\s*)+(//.*)?$|' + r'\s*(U_NAMESPACE_BEGIN)|' + r'\s*(HASH_NAMESPACE_DECLARATION_START)') +# Also detect Allman and mixed style namespaces. Use a continue regex for +# validation and to correctly set the line info. +_NAMESPACE_START_ALLMAN_RE = re.compile(r'\s*(namespace\b[^{=]*)+(//.*)?$') +_NAMESPACE_START_MIXED_RE = re.compile( + r'\s*(namespace\b[^{]*{\s*)+(namespace\b[^{]*)+(//.*)?$') +_NAMESPACE_CONTINUE_ALLMAN_MIXED_RE = re.compile(r'\s*{\s*(//.*)?$') +_NAMESPACE_END_RE = re.compile(r'\s*(})|' + r'\s*(U_NAMESPACE_END)|' + r'\s*(HASH_NAMESPACE_DECLARATION_END)') +# The group (in parens) holds the unique 'key' identifying this #include. +_INCLUDE_RE = re.compile(r'\s*#\s*include\s+([<"][^">]+[>"])') +# We don't need this to actually match forward-declare lines (we get +# that information from the iwyu input), but we do need an RE here to +# serve as an index to _LINE_TYPES. So we use an RE that never matches. +_FORWARD_DECLARE_RE = re.compile(r'$.FORWARD_DECLARE_RE') +# Likewise, used to mark an '#ifdef' line of a header guard, or other +# #ifdef that covers an entire file. +_HEADER_GUARD_RE = re.compile(r'$.HEADER_GUARD_RE') +# Marks the '#define' line that comes after a header guard. Since we +# know the previous line was a header-guard line, we're not that picky +# about this one. +_HEADER_GUARD_DEFINE_RE = re.compile(r'\s*#\s*define\s+') +# Pragma to mark the associated header (for use when it cannot be deduced from +# the filename) +_IWYU_PRAGMA_ASSOCIATED_RE = re.compile(r'IWYU\s*pragma:\s*associated') + +# We annotate every line in the source file by the re it matches, or None. +# Note that not all of the above RE's are represented here; for instance, +# we fold _C_COMMENT_START_RE and _C_COMMENT_END_RE into _COMMENT_LINE_RE. +# The _NAMESPACE_CONTINUE_ALLMAN_MIXED_RE is also set on lines when Allman +# and mixed namespaces are detected but the RE is too easy to match to add +# under normal circumstances (must always be preceded by Allman/mixed). +_LINE_TYPES = [_COMMENT_LINE_RE, _BLANK_LINE_RE, + _NAMESPACE_START_RE, _NAMESPACE_START_ALLMAN_RE, + _NAMESPACE_START_MIXED_RE, _NAMESPACE_END_RE, + _IF_RE, _ELSE_RE, _ENDIF_RE, + _INCLUDE_RE, _FORWARD_DECLARE_RE, + _HEADER_GUARD_RE, _HEADER_GUARD_DEFINE_RE, + _PRAGMA_ONCE_LINE_RE, + _PRAGMA_PUSH_LINE_RE, _PRAGMA_POP_LINE_RE, + ] + +# A regexp matching #include lines that should be a barrier for +# sorting -- that is, we should never reorganize the code so an +# #include that used to come before this line now comes after, or vice +# versa. This can be used for 'fragile' #includes that require other +# #includes to happen before them to function properly. +# (Note that the barrier has no effect on where new #includes are +# added; it just affects the reordering of existing #includes.) +_BARRIER_INCLUDES = re.compile(r'^\s*#\s*include\s+(s) to the + # full line as given by iwyu, which includes comments that iwyu + # has put next to the #include. This holds both 'to-add' and + # 'to-keep' #includes. If flags.comments is False, the comments + # are removed before adding to this list. + self.full_include_lines = OrderedDict() + + def Merge(self, other): + """Merges other with this one. They must share a filename. + + This function is intended to be used when we see two iwyu records + in the input, both for the same file. We can merge the two together. + We are conservative: we union the lines to add, and intersect the + lines to delete. + + Arguments: + other: an IWYUOutputRecord to merge into this one. + It must have the same value for filename that self does. + """ + assert self.filename == other.filename, "Can't merge distinct files" + self.lines_to_delete.intersection_update(other.lines_to_delete) + self.some_include_lines.update(other.some_include_lines) + self.seen_forward_declare_lines.update(other.seen_forward_declare_lines) + self.nested_forward_declare_lines.update(other.nested_forward_declare_lines) + self.includes_and_forward_declares_to_add.update( + other.includes_and_forward_declares_to_add) + self.full_include_lines.update(other.full_include_lines) + + def HasContentfulChanges(self): + """Returns true iff this record has at least one add or delete.""" + return (self.includes_and_forward_declares_to_add or + self.lines_to_delete) + + def __str__(self): + return ('--- iwyu record ---\n FILENAME: %s\n LINES TO DELETE: %s\n' + ' (SOME) INCLUDE LINES: %s\n (SOME) FWD-DECL LINES: %s\n' + ' TO ADD: %s\n ALL INCLUDES: %s\n---\n' + % (self.filename, self.lines_to_delete, + self.some_include_lines, self.seen_forward_declare_lines, + self.includes_and_forward_declares_to_add, + self.full_include_lines)) + + +class IWYUOutputParser(object): + """Parses the lines in iwyu output corresponding to one source file.""" + + # iwyu adds this comment to some lines to map them to the source file. + _LINE_NUMBERS_COMMENT_RE = re.compile(r'\s*// lines ([0-9]+)-([0-9]+)') + + # The output of include-what-you-use has sections that indicate what + # #includes and forward-declares should be added to the output file, + # what should be removed, and what the end result is. The first line + # of each section also has the filename. + _ADD_SECTION_RE = re.compile(r'^(.*) should add these lines:$') + _REMOVE_SECTION_RE = re.compile(r'^(.*) should remove these lines:$') + _TOTAL_SECTION_RE = re.compile(r'^The full include-list for (.*):$') + _SECTION_END_RE = re.compile(r'^---$') + + # Alternately, if a file does not need any iwyu modifications (though + # it still may need its #includes sorted), iwyu will emit this: + _NO_EDITS_RE = re.compile(r'^\((.*) has correct #includes/fwd-decls\)$') + + _RE_TO_NAME = {_ADD_SECTION_RE: 'add', + _REMOVE_SECTION_RE: 'remove', + _TOTAL_SECTION_RE: 'total', + _SECTION_END_RE: 'end', + _NO_EDITS_RE: 'no_edits', + } + # A small state-transition machine. key==None indicates the start + # state. value==None means that the key is an end state (that is, + # its presence indicates the record is finished). + _EXPECTED_NEXT_RE = { + None: frozenset([_ADD_SECTION_RE, _NO_EDITS_RE]), + _ADD_SECTION_RE: frozenset([_REMOVE_SECTION_RE]), + _REMOVE_SECTION_RE: frozenset([_TOTAL_SECTION_RE]), + _TOTAL_SECTION_RE: frozenset([_SECTION_END_RE]), + _SECTION_END_RE: None, + _NO_EDITS_RE: None, + } + + def __init__(self): + # This is set to one of the 'section' REs above. None is the start-state. + self.current_section = None + self.filename = '' + self.lines_by_section = {} # key is an RE, value is a list of lines + + def _ProcessOneLine(self, line, basedir=None): + """Reads one line of input, updates self, and returns False at EORecord. + + If the line matches one of the hard-coded section names, updates + self.filename and self.current_section. Otherwise, the line is + taken to be a member of the currently active section, and is added + to self.lines_by_section. + + Arguments: + line: one line from the iwyu input file. + + Returns: + False if the line is the end-of-section marker, True otherwise. + + Raises: + FixIncludesError: if there is an out-of-order section or + mismatched filename. + """ + line = line.rstrip() # don't worry about line endings + if not line: # just ignore blank lines + return True + + for (section_re, section_name) in self._RE_TO_NAME.items(): + m = section_re.search(line) + if m: + # Check or set the filename (if the re has a group, it's for filename). + if section_re.groups >= 1: + this_filename = NormalizeFilePath(basedir, m.group(1)) + + if (self.current_section is not None and + this_filename != self.filename): + raise FixIncludesError('"%s" section for %s comes after "%s" for %s' + % (section_name, this_filename, + self._RE_TO_NAME[self.current_section], + self.filename)) + self.filename = this_filename + + # Check and set the new section we're entering. + if section_re not in self._EXPECTED_NEXT_RE[self.current_section]: + if self.current_section is None: + raise FixIncludesError('%s: "%s" section unexpectedly comes first' + % (self.filename, section_name)) + else: + raise FixIncludesError('%s: "%s" section unexpectedly follows "%s"' + % (self.filename, section_name, + self._RE_TO_NAME[self.current_section])) + self.current_section = section_re + # We're done parsing this record if this section has nothing after it. + return self._EXPECTED_NEXT_RE[self.current_section] is not None + + # We're not starting a new section, so just add to the current section. + # We ignore lines before section-start, they're probably things like + # compiler messages ("Compiling file foo"). + if self.current_section is not None: + self.lines_by_section.setdefault(self.current_section, []).append(line) + return True + + def ParseOneRecord(self, iwyu_output, flags): + """Given a file object with output from an iwyu run, return per file info. + + For each source file that iwyu_output mentions (because iwyu was run on + it), we return a structure holding the information in IWYUOutputRecord: + 1) What file these changes apply to + 2) What line numbers hold includes/fwd-declares to remove + 3) What includes/fwd-declares to add + 4) Ordering information for includes and fwd-declares + + Arguments: + iwyu_output: a File object returning lines from an iwyu run + flags: commandline flags, as parsed by argparse. We use + flags.comments, which controls whether we output comments + generated by iwyu. + Returns: + An IWYUOutputRecord object, or None at EOF. + + Raises: + FixIncludesError: for malformed-looking lines in the iwyu output. + """ + for line in iwyu_output: + if not self._ProcessOneLine(line, flags.basedir): + # returns False at end-of-record + break + else: # for/else + return None # at EOF + + # Now set up all the fields in an IWYUOutputRecord. + # IWYUOutputRecord.filename + retval = IWYUOutputRecord(self.filename) + + # IWYUOutputRecord.lines_to_delete + for line in self.lines_by_section.get(self._REMOVE_SECTION_RE, []): + m = self._LINE_NUMBERS_COMMENT_RE.search(line) + if not m: + raise FixIncludesError('line "%s" (for %s) has no line number' + % (line, self.filename)) + # The RE is of the form [start_line, end_line], inclusive. + for line_number in range(int(m.group(1)), int(m.group(2)) + 1): + retval.lines_to_delete.add(line_number) + + # IWYUOutputRecord.some_include_lines + for line in (self.lines_by_section.get(self._REMOVE_SECTION_RE, []) + + self.lines_by_section.get(self._TOTAL_SECTION_RE, [])): + if not _INCLUDE_RE.match(line): + continue + m = self._LINE_NUMBERS_COMMENT_RE.search(line) + if not m: + continue # not all #include lines have line numbers, but some do + for line_number in range(int(m.group(1)), int(m.group(2)) + 1): + retval.some_include_lines.add(line_number) + + # IWYUOutputRecord.seen_forward_declare_lines + for line in (self.lines_by_section.get(self._REMOVE_SECTION_RE, []) + + self.lines_by_section.get(self._TOTAL_SECTION_RE, [])): + # Everything that's not an #include is a forward-declare. + if line.startswith('- '): # the 'remove' lines all start with '- '. + line = line[len('- '):] + if _INCLUDE_RE.match(line): + continue + m = self._LINE_NUMBERS_COMMENT_RE.search(line) + if m: + line_range = (int(m.group(1)), int(m.group(2))+1) + retval.seen_forward_declare_lines.add(line_range) + if '::' in line: + retval.nested_forward_declare_lines.add(line_range) + + # IWYUOutputRecord.includes_and_forward_declares_to_add + for line in self.lines_by_section.get(self._ADD_SECTION_RE, []): + line = _COMMENT_RE.sub('', line) + retval.includes_and_forward_declares_to_add.add(line) + + # IWYUOutputRecord.full_include_lines + for line in self.lines_by_section.get(self._TOTAL_SECTION_RE, []): + m = _INCLUDE_RE.match(line) + if m: + if not flags.comments: + line = _COMMENT_RE.sub('', line) # pretend there were no comments + else: + # Just remove '// line XX': that's iwyu metadata, not a real comment + line = self._LINE_NUMBERS_COMMENT_RE.sub('', line) + retval.full_include_lines[m.group(1)] = line + + return retval + + +class LineInfo(object): + """Information about a single line of a source file.""" + + def __init__(self, line): + """Initializes the content of the line, but no ancillary fields.""" + # The content of the line in the input file + self.line = line + + # The 'type' of the line. The 'type' is one of the regular + # expression objects in _LINE_TYPES, or None for any line that + # does not match any regular expression in _LINE_TYPES. + self.type = None + + # True if no lines processed before this one have the same type + # as this line. + self.is_first_line_of_this_type = False + + # Set to true if we want to delete/ignore this line in the output + # (for instance, because iwyu says to delete this line). At the + # start, the only line to delete is the 'dummy' line 0. + self.deleted = self.line is None + + # If this line is an #include or a forward-declare, gives a + # [begin,end) pair saying the 'span' this line is part of. We do + # this for two types of span: the move span (an #include or + # forward declare, along with any preceding comments) and the + # reorder span (a continguous block of move-spans, connected only + # by blank lines and comments). For lines that are not an + # #include or forward-declare, these may have an arbitrary value. + self.move_span = None + self.reorder_span = None + + # If this line is an #include or a forward-declare, gives the + # 'key' of the line. For #includes it is the filename included, + # including the ""s or <>s. For a forward-declare it's the name + # of the class/struct. For other types of lines, this is None. + self.key = None + + # If this is a forward-declaration of a nested class, then this will be + # True. + self.is_nested_forward_declaration = False + + def __str__(self): + if self.deleted: + line = 'XX-%s-XX' % self.line + else: + line = '>>>%s<<<' % self.line + if self.type is None: + type_id = None + else: + type_id = _LINE_TYPES.index(self.type) + return ('%s\n -- type: %s (key: %s). move_span: %s. reorder_span: %s' + % (line, type_id, self.key, self.move_span, self.reorder_span)) + + +class FileInfo(object): + """ Details about a file's storage encoding """ + DEFAULT_LINESEP = os.linesep + DEFAULT_ENCODING = 'utf-8' + + def __init__(self, linesep, encoding): + self.linesep = linesep + self.encoding = encoding + + @staticmethod + def parse(filename): + """ Return a FileInfo object describing file encoding details. """ + with open(filename, 'rb') as f: + content = f.read() + + linesep = FileInfo.guess_linesep(content) + encoding = FileInfo.guess_encoding(content) + return FileInfo(linesep, encoding) + + @staticmethod + def guess_linesep(bytebuf): + """ Return most frequent line separator of buffer. """ + win = bytebuf.count(b'\r\n') + unix = bytebuf.count(b'\n') - win + if win > unix: + return '\r\n' + elif unix > win: + return '\n' + + return FileInfo.DEFAULT_LINESEP + + @staticmethod + def guess_encoding(bytebuf): + """ Return approximate encoding for buffer. + + This is heavily heuristic, and will return any supported encoding that can + describe the file without losing information, not necessarily the *right* + encoding. This is usually OK, because IWYU typically only adds ASCII content + (or content pulled from the file itself). + """ + def try_decode(buf, encoding): + try: + buf.decode(encoding, errors='strict') + except UnicodeError: + return False + return True + + # Special-case UTF-8 BOM + if bytebuf[0:3] == b'\xef\xbb\xbf': + if try_decode(bytebuf, 'utf-8-sig'): + return 'utf-8-sig' + + encodings = ['ascii', 'utf-8', 'windows-1250', 'windows-1252'] + for encoding in encodings: + if try_decode(bytebuf, encoding): + return encoding + + return FileInfo.DEFAULT_ENCODING + + +def _ReadFile(filename, fileinfo): + """Read from filename and return a list of file lines.""" + try: + with open(filename, 'rb') as f: + content = f.read() + # Call splitlines with True to keep the original line + # endings. Later in WriteFile, they will be used as-is. + # This will reduce spurious changes to the original files. + # The lines we add will have the linesep determined by + # FileInfo. + return content.decode(fileinfo.encoding).splitlines(True) + except (IOError, OSError) as why: + print("Skipping '%s': %s" % (filename, why)) + return None + + +def _WriteFile(filename, fileinfo, file_lines): + """Write the given file-lines to the file.""" + try: + with open(filename, 'wb') as f: + # file_lines already have line endings, so join with ''. + content = ''.join(file_lines) + content = content.encode(fileinfo.encoding) + f.write(content) + except (IOError, OSError) as why: + print("Error writing '%s': %s" % (filename, why)) + + +def PrintFileDiff(old_file_contents, new_file_contents): + """Print a unified diff between files, specified as lists of lines.""" + diff = difflib.unified_diff(old_file_contents, new_file_contents) + # skip the '--- /+++ ' lines at the start + try: + next(diff) + next(diff) + print('\n'.join(l.rstrip() for l in diff)) + except StopIteration: + pass + + +def _MarkHeaderGuardIfPresent(file_lines): + """If any line in file_lines is a header-guard, mark it in file_lines. + + We define a header-guard as follows: an #ifdef where there is + nothing contentful before or after the #ifdef. Also, the #ifdef + should have no #elif in it (though we don't currently test that). + This catches the common case of an 'ifdef guard' in .h file, such + as '#ifndef FOO_H\n#define FOO_H\n...contents...\n#endif', but it + can also catch other whole-program #ifdefs, such as + '#ifdef __linux\n...\n#endif'. The issue here is that if an #ifdef + encloses the entire file, then we are willing to put new + #includes/fwd-declares inside the #ifdef (which normally we + wouldn't do). So we want to mark such #ifdefs with a special label. + + If we find such an #ifdef line -- and a single file can have at most + one -- we change its type to a special type for header guards. + + Arguments: + file_lines: an array of LineInfo objects with .type filled in. + """ + # Pass over blank lines, pragmas and comments at the top of the file. + i = 0 + for i in range(len(file_lines)): + if (not file_lines[i].deleted and + file_lines[i].type not in [_COMMENT_LINE_RE, _BLANK_LINE_RE, + _PRAGMA_ONCE_LINE_RE]): + break + else: # for/else: got to EOF without finding any non-blank/comment lines + return + + # This next line is the candidate header guard-line. + ifdef_start = i + if file_lines[ifdef_start].type != _IF_RE: + # Not a header guard, just return without doing anything. + return + + # Find the end of this ifdef, to see if it's really a header guard.. + ifdef_depth = 0 + for ifdef_end in range(ifdef_start, len(file_lines)): + if file_lines[ifdef_end].deleted: + continue + if file_lines[ifdef_end].type == _IF_RE: + ifdef_depth += 1 + elif file_lines[ifdef_end].type == _ENDIF_RE: + ifdef_depth -= 1 + if ifdef_depth == 0: # The end of our #ifdef! + break + else: # for/else + return False # Weird: never found a close to this #ifdef + + # Finally, all the lines after the end of the ifdef must be blank or comments. + for i in range(ifdef_end + 1, len(file_lines)): + if (not file_lines[i].deleted and + file_lines[i].type not in [_COMMENT_LINE_RE, _BLANK_LINE_RE]): + return + + # We passed the gauntlet! + file_lines[ifdef_start].type = _HEADER_GUARD_RE + + # And the line after the header guard #ifdef is the '#define' (usually). + if _HEADER_GUARD_DEFINE_RE.match(file_lines[ifdef_start + 1].line): + file_lines[ifdef_start+1].type = _HEADER_GUARD_DEFINE_RE + + +def _CalculateLineTypesAndKeys(file_lines, iwyu_record): + """Fills file_line's type and key fields, where the 'type' is a regexp object. + + We match each line (line_info.line) against every regexp in + _LINE_TYPES, and assign the first that matches, or None if none + does. We also use iwyu_record's some_include_lines and + seen_forward_declare_lines to identify those lines. In fact, + that's the only data source we use for forward-declare lines. + + Sets file_line.type and file_line.is_first_line_of_this_type for + each file_line in file_lines. + + Arguments: + file_lines: an array of LineInfo objects with .line fields filled in. + iwyu_record: the IWYUOutputRecord struct for this source file. + + Raises: + FixIncludesError: if iwyu_record's line-number information is + is inconsistent with what we see in the file. (For instance, + it says line 12 is an #include, but we say it's a blank line, + or the file only has 11 lines.) + """ + seen_types = set() + in_c_style_comment = False + in_allman_or_mixed_namespace = False + for line_info in file_lines: + if line_info.line is None: + line_info.type = None + elif _C_COMMENT_START_RE.match(line_info.line): + # Note: _C_COMMENT_START_RE only matches a comment at the start + # of a line. Comments in the middle of a line are ignored. + # This can cause problems with multi-line comments that start + # in the middle of the line, but that's hopefully quite rare. + # TODO(csilvers): check for that case. + m = _C_COMMENT_END_RE.match(line_info.line) + if not m: # comment continues onto future lines + line_info.type = _COMMENT_LINE_RE + in_c_style_comment = True + elif not m.group(1): # comment extends across entire line (only) + line_info.type = _COMMENT_LINE_RE + else: # comment takes only part of line, treat as content + # TODO(csilvers): this mis-diagnoses lines like '/*comment*/class Foo;' + line_info.type = None + elif in_c_style_comment and _C_COMMENT_END_RE.match(line_info.line): + line_info.type = _COMMENT_LINE_RE + in_c_style_comment = False + elif in_c_style_comment: + line_info.type = _COMMENT_LINE_RE + elif (in_allman_or_mixed_namespace and + _NAMESPACE_CONTINUE_ALLMAN_MIXED_RE.match(line_info.line)): + in_allman_or_mixed_namespace = False + line_info.type = _NAMESPACE_CONTINUE_ALLMAN_MIXED_RE + else: + for type_re in _LINE_TYPES: + # header-guard-define-re has a two-part decision criterion: it + # matches the RE, *and* it comes after a header guard line. + # That's too complex to figure out now, so we skip over it now + # and fix it up later in _MarkHeaderGuardIfPresent(). + if type_re in (_HEADER_GUARD_DEFINE_RE,): + continue + m = type_re.match(line_info.line) + if m: + line_info.type = type_re + if type_re == _INCLUDE_RE: + line_info.key = m.group(1) # get the 'key' for the #include. + elif type_re in (_NAMESPACE_START_ALLMAN_RE, + _NAMESPACE_START_MIXED_RE): + # set in_allman_or_mixed_namespace to true to find the next { + in_allman_or_mixed_namespace = True + break + else: # for/else + line_info.type = None # means we didn't match any re + + line_info.is_first_line_of_this_type = (line_info.type not in seen_types) + seen_types.add(line_info.type) + + # Now double-check against iwyu that we got all the #include lines right. + for line_number in iwyu_record.some_include_lines: + if file_lines[line_number].type != _INCLUDE_RE: + raise FixIncludesError('iwyu line number %s:%d (%s) is not an #include' + % (iwyu_record.filename, line_number, + file_lines[line_number].line)) + + # We depend entirely on the iwyu_record for the forward-declare lines. + for (start_line, end_line) in iwyu_record.seen_forward_declare_lines: + for line_number in range(start_line, end_line): + if line_number >= len(file_lines): + raise FixIncludesError('iwyu line number %s:%d is past file-end' + % (iwyu_record.filename, line_number)) + file_lines[line_number].type = _FORWARD_DECLARE_RE + + for (start_line, end_line) in iwyu_record.nested_forward_declare_lines: + for line_number in range(start_line, end_line): + if line_number >= len(file_lines): + raise FixIncludesError('iwyu line number %s:%d is past file-end' + % (iwyu_record.filename, line_number)) + file_lines[line_number].is_nested_forward_declaration = True + + # While we're at it, let's do a bit more sanity checking on iwyu_record. + for line_number in iwyu_record.lines_to_delete: + if line_number >= len(file_lines): + raise FixIncludesError('iwyu line number %s:%d is past file-end' + % (iwyu_record.filename, line_number)) + elif file_lines[line_number].type not in (_INCLUDE_RE, + _FORWARD_DECLARE_RE): + raise FixIncludesError('iwyu line number %s:%d (%s) is not' + ' an #include or forward declare' + % (iwyu_record.filename, line_number, + file_lines[line_number].line)) + + # Check if this file has a header guard, which for our purposes is + # an #ifdef (or #if) that covers an entire source file. Usually + # this will be a standard .h header-guard, but it could be something + # like '#if __linux/#endif'. The point here is that if an #ifdef + # encloses the entire file, then we are willing to put new + # #includes/fwd-declares inside the #ifdef (which normally we + # wouldn't do). So we mark such #ifdefs with a special label. + _MarkHeaderGuardIfPresent(file_lines) + + +def _PreviousNondeletedLine(file_lines, line_number): + """Returns the line number of the previous not-deleted line, or None.""" + for line_number in range(line_number - 1, -1, -1): + if not file_lines[line_number].deleted: + return line_number + return None + + +def _NextNondeletedLine(file_lines, line_number): + """Returns the line number of the next not-deleted line, or None.""" + for line_number in range(line_number + 1, len(file_lines)): + if not file_lines[line_number].deleted: + return line_number + return None + + +def _LineNumberStartingPrecedingComments(file_lines, line_number): + """Returns the line-number for the comment-lines preceding the given linenum. + + Looking at file_lines, look at the lines immediately preceding the + given line-number. If they're comment lines, return the first line + of the comment lines preceding the given line. Otherwise, return + the given line number. + + As a special case, if the comments go all the way up to the first + line of the file (line 1), we assume they're comment lines, which + are special -- they're not associated with any source code line -- + and we return line_number in that case. + + Arguments: + file_lines: an array of LineInfo objects, with .type fields filled in. + line_number: an index into file_lines. + + Returns: + The first line number of the preceding comments, or line_number + if there are no preceding comments or they appear to be a + top-of-file copyright notice. + """ + retval = line_number + while retval > 0 and file_lines[retval - 1].type == _COMMENT_LINE_RE: + retval -= 1 + if retval <= 1: # top-of-line comments + retval = line_number # so ignore all the comment lines + return retval + + +def _CalculateMoveSpans(file_lines, forward_declare_spans): + """Fills each input_line's move_span field. + + A 'move span' is a range of lines (from file_lines) that includes + an #include or forward-declare, and all the comments preceding it. + It is the unit we would move if we decided to move (or delete) this + #include or forward-declare. + + For lines of type _INCLUDE_RE or _FORWARD_DECLARE_RE, the move span + is set to the tuple [start_of_span, end_of_span). All other lines + have the move span kept at None. + + Arguments: + file_lines: an array of LineInfo objects, with .type fields filled in. + forward_declare_spans: a set of line-number pairs + [start_line, end_line), each representing a single namespace. + In practice this comes from iwyu_record.seen_forward_declare_lines. + """ + # First let's do #includes. + for line_number in range(len(file_lines)): + if file_lines[line_number].type == _INCLUDE_RE: + span_begin = _LineNumberStartingPrecedingComments(file_lines, line_number) + for i in range(span_begin, line_number + 1): + file_lines[i].move_span = (span_begin, line_number + 1) + + # Now forward-declares. These spans come as input to this function. + for (span_begin, span_end) in forward_declare_spans: + span_begin = _LineNumberStartingPrecedingComments(file_lines, span_begin) + for i in range(span_begin, span_end): + file_lines[i].move_span = (span_begin, span_end) + + +def _ContainsBarrierInclude(file_lines, line_range): + """Returns true iff some line in [line_range[0], line_range[1]) is BARRIER.""" + for line_number in range(*line_range): + if (not file_lines[line_number].deleted and + _BARRIER_INCLUDES.search(file_lines[line_number].line)): + return True + return False + + +def _LinesAreAllBlank(file_lines, start_line, end_line): + """Returns true iff all lines in [start_line, end_line) are blank/deleted.""" + for line_number in range(start_line, end_line): + if (not file_lines[line_number].deleted and + file_lines[line_number].type != _BLANK_LINE_RE): + return False + return True + + +def _CalculateReorderSpans(file_lines): + """Fills each input_line's reorder_span field. + + A 'reorder span' is a range of lines (from file_lines) that only has + #includes and forward-declares in it (and maybe blank lines, and + comments associated with #includes or forward-declares). In + particular, it does not include any "real code" besides #includes + and forward-declares: no functions, no static variable assignment, + no macro #defines, no nothing. We are willing to reorder #includes + and namespaces freely inside a reorder span. + + Calculating reorder_span is easy: they're just the union of + contiguous move-spans (with perhaps blank lines and comments + thrown in), because move-spans share the 'no actual code' + requirement. + + There's one exception: if any move-span matches the + _BARRIER_INCLUDES regexp, it means that we should consider that + move-span to be a 'barrier': nothing should get reordered from one + side of that move-span to the other. (This is used for #includes + that depend on other #includes being before them to function + properly.) We do that by putting them into their own reorder span. + + For lines of type _INCLUDE_RE or _FORWARD_DECLARE_RE, the reorder + span is set to the tuple [start_of_span, end_of_span). All other + lines have an arbitrary value for the reorder span. + + Arguments: + file_lines: an array of LineInfo objects with .type and .move_span + fields filled in. + """ + # Happily, move_spans are disjoint. Just make sure they're sorted and unique. + move_spans = [s.move_span for s in file_lines if s.move_span is not None] + sorted_move_spans = sorted(set(move_spans)) + + i = 0 + while i < len(sorted_move_spans): + reorder_span_start = sorted_move_spans[i][0] + + # If we're a 'nosort' include, we're always in a reorder span of + # our own. Otherwise, add in the next move span if we're + # connected to it only by blank lines. + if not _ContainsBarrierInclude(file_lines, sorted_move_spans[i]): + while i < len(sorted_move_spans) - 1: + move_span_end = sorted_move_spans[i][1] + next_move_span_start = sorted_move_spans[i+1][0] + if (_LinesAreAllBlank(file_lines, move_span_end, next_move_span_start) + and not _ContainsBarrierInclude(file_lines, sorted_move_spans[i+1])): + i += 1 + else: + break + reorder_span_end = sorted_move_spans[i][1] + # We'll map every line in the span to the span-extent. + for line_number in range(reorder_span_start, reorder_span_end): + file_lines[line_number].reorder_span = (reorder_span_start, + reorder_span_end) + i += 1 + + +def ParseOneFile(f, iwyu_record): + """Given a file object, read and classify the lines of the file. + + For each file that iwyu_output mentions, we return a list of LineInfo + objects, which is a parsed version of each line, including not only + its content but its 'type', its 'key', etc. + + Arguments: + f: an iterable object returning lines from a file. + iwyu_record: the IWYUOutputRecord struct for this source file. + + Returns: + An array of LineInfo objects. The first element is always a dummy + element, so the first line of the file is at retval[1], matching + the way iwyu counts line numbers. + """ + file_lines = [LineInfo(None)] + for line in f: + file_lines.append(LineInfo(line)) + _CalculateLineTypesAndKeys(file_lines, iwyu_record) + _CalculateMoveSpans(file_lines, iwyu_record.seen_forward_declare_lines) + _CalculateReorderSpans(file_lines) + return file_lines + + +def _DeleteEmptyNamespaces(file_lines): + """Delete namespaces with nothing in them. + + Empty namespaces could be caused by transformations that removed + forward-declarations: + namespace foo { + class Myclass; + } + -> + namespace foo { + } + We want to get rid of the 'empty' namespace in this case. + + This routine 'deletes' lines by setting their 'deleted' field to True. + + Arguments: + file_lines: an array of LineInfo objects with .type fields filled in. + + Returns: + The number of namespaces deleted. + """ + num_namespaces_deleted = 0 + start_line = 0 + while start_line < len(file_lines): + line_info = file_lines[start_line] + if (line_info.deleted or + (line_info.type != _NAMESPACE_START_RE and + line_info.type != _NAMESPACE_START_ALLMAN_RE and + line_info.type != _NAMESPACE_START_MIXED_RE)): + start_line += 1 + continue + if line_info.type in (_NAMESPACE_START_RE, _NAMESPACE_START_MIXED_RE): + # Because multiple namespaces can be on one line + # ("namespace foo { namespace bar { ..."), we need to count. + # We use the max because line may have 0 '{'s if it's a macro. + # TODO(csilvers): ignore { in comments. + namespace_depth = max(line_info.line.count('{'), 1) + elif line_info.type == _NAMESPACE_START_ALLMAN_RE: + # For Allman namespaces, keep the start line and increment + # the namespace depths when the actual brace is encountered. + namespace_depth = 0 + else: + # We should have handled all the namespace styles above! + assert False, ('unknown namespace type', + _LINE_TYPES.index(line_info.type)) + end_line = start_line + 1 + while end_line < len(file_lines): + line_info = file_lines[end_line] + if line_info.deleted: + end_line += 1 + elif line_info.type in (_COMMENT_LINE_RE, _BLANK_LINE_RE): + end_line += 1 # ignore blank lines + elif line_info.type == _NAMESPACE_CONTINUE_ALLMAN_MIXED_RE: + namespace_depth += 1 + end_line += 1 + elif line_info.type in (_NAMESPACE_START_RE, _NAMESPACE_START_MIXED_RE): + # nested namespace + namespace_depth += max(line_info.line.count('{'), 1) + end_line += 1 + elif line_info.type == _NAMESPACE_START_ALLMAN_RE: + # nested Allman namespace + end_line += 1 + elif line_info.type == _NAMESPACE_END_RE: + namespace_depth -= max(line_info.line.count('}'), 1) + end_line += 1 + if namespace_depth <= 0: + # Delete any comments preceding this namespace as well. + start_line = _LineNumberStartingPrecedingComments(file_lines, + start_line) + # And also blank lines. + while (start_line > 0 and + file_lines[start_line-1].type == _BLANK_LINE_RE): + start_line -= 1 + for line_number in range(start_line, end_line): + file_lines[line_number].deleted = True + num_namespaces_deleted += 1 + break + else: # bail: we're at a line indicating this isn't an empty namespace + end_line = start_line + 1 # rewind to try again with nested namespaces + break + start_line = end_line + + return num_namespaces_deleted + + +def _DeleteEmptyIfdefs(file_lines): + """Deletes ifdefs with nothing in them. + + This could be caused by transformations that removed #includes: + #ifdef OS_WINDOWS + # include + #endif + -> + #ifdef OS_WINDOWS + #endif + We want to get rid of the 'empty' #ifdef in this case. + We also handle 'empty' #ifdefs with #else, if both sides of + the #else are empty. We also handle #ifndef and #if. + + This routine 'deletes' lines by replacing their content with None. + + Arguments: + file_lines: an array of LineInfo objects with .type fields filled in. + + Returns: + The number of ifdefs deleted. + """ + num_ifdefs_deleted = 0 + start_line = 0 + while start_line < len(file_lines): + if file_lines[start_line].type not in (_IF_RE, _HEADER_GUARD_RE): + start_line += 1 + continue + end_line = start_line + 1 + while end_line < len(file_lines): + line_info = file_lines[end_line] + if line_info.deleted: + end_line += 1 + elif line_info.type in (_ELSE_RE, _COMMENT_LINE_RE, _BLANK_LINE_RE): + end_line += 1 # ignore blank lines + elif line_info.type == _ENDIF_RE: + end_line += 1 + # Delete any comments preceding this #ifdef as well. + start_line = _LineNumberStartingPrecedingComments(file_lines, + start_line) + # And also blank lines. + while (start_line > 0 and + file_lines[start_line-1].type == _BLANK_LINE_RE): + start_line -= 1 + for line_number in range(start_line, end_line): + file_lines[line_number].deleted = True + num_ifdefs_deleted += 1 + break + else: # bail: we're at a line indicating this isn't an empty ifdef + end_line = start_line + 1 # rewind to try again with nested #ifdefs + break + start_line = end_line + + return num_ifdefs_deleted + + +def _DeleteDuplicateLines(file_lines, line_ranges): + """Goes through all lines in line_ranges, and if any are dups, deletes them. + + For all lines in line_ranges, if any is the same as a previously + seen line, set its deleted bit to True. The purpose of line_ranges + is to avoid lines in #ifdefs and namespaces, that may be identical + syntactically but have different semantics. Ideally, line_ranges + should include only 'top-level' lines. + + We ignore lines that consist only of comments (or are blank). We + ignore end-of-line comments when comparing lines for equality. + NOTE: Because our comment-finding RE is primitive, it's best if + line_ranges covers only #include and forward-declare lines. In + particular, it should not cover lines that may have C literal + strings in them. + + We only delete whole move_spans, not lines within them. + + Arguments: + file_lines: an array of LineInfo objects. + line_ranges: a list of [start_line, end_line) pairs. + """ + seen_lines = set() + for line_range in line_ranges: + for line_number in range(*line_range): + line_info = file_lines[line_number] + if line_info.type in (_BLANK_LINE_RE, _COMMENT_LINE_RE): + continue + if line_number != line_info.move_span[0]: + continue + span_line_numbers = range(line_info.move_span[0], line_info.move_span[1]) + line_infos_in_span = [file_lines[i] for i in span_line_numbers] + uncommented_lines = [ + _COMMENT_RE.sub('', inf.line.strip()) for inf in line_infos_in_span] + uncommented_span = ' '.join(uncommented_lines) + if uncommented_span in seen_lines: + for info in line_infos_in_span: + info.deleted = True + elif not line_info.deleted: + seen_lines.add(uncommented_span) + + +def _DeleteExtraneousBlankLines(file_lines, line_range): + """Deletes extraneous blank lines caused by line deletion. + + Here's a example file: + class Foo { ... }; + + class Bar; + + class Baz { ... } + + If we delete the "class Bar;" line, we also want to delete one of + the blank lines around it, otherwise we leave two blank lines + between Foo and Baz which looks bad. The idea is that if we have + whitespace on both sides of a deleted span of code, the whitespace + on one of the sides is 'extraneous'. In this case, we should delete + not only 'class Bar;' but also the whitespace line below it. That + leaves one blank line between Foo and Bar, like people would expect. + + We're careful to only delete the minimum of the number of blank + lines that show up on either side. If 'class Bar' had one blank + line before it, and one hundred after it, we'd only delete one blank + line when we delete 'class Bar'. This matches user's expecatations. + + The situation can get tricky when two deleted spans touch (we might + think it's safe to delete the whitespace between them when it's + not). To be safe, we only do this check when an entire reorder-span + has been deleted. So we check the given line_range, and only do + blank-line deletion if every line in the range is deleted. + + Arguments: + file_lines: an array of LineInfo objects, with .type filled in. + line_range: a range [start_line, end_line). It should correspond + to a reorder-span. + """ + # First make sure the entire span is deleted. + for line_number in range(*line_range): + if not file_lines[line_number].deleted: + return + + before_line = _PreviousNondeletedLine(file_lines, line_range[0]) + after_line = _NextNondeletedLine(file_lines, line_range[1] - 1) + while (before_line and file_lines[before_line].type == _BLANK_LINE_RE and + after_line and file_lines[after_line].type == _BLANK_LINE_RE): + # OK, we've got whitespace on both sides of a deleted span. We + # only want to keep whitespace on one side, so delete on the other. + file_lines[after_line].deleted = True + before_line = _PreviousNondeletedLine(file_lines, before_line) + after_line = _NextNondeletedLine(file_lines, after_line) + + +def _ShouldInsertBlankLine(decorated_move_span, next_decorated_move_span, + file_lines, flags): + """Returns true iff we should insert a blank line between the two spans. + + Given two decorated move-spans, of the form + (reorder_range, kind, noncomment_lines, all_lines) + returns true if we should insert a blank line between them. We + always put a blank line when transitioning from an #include to a + forward-declare and back. When the appropriate commandline flag is + set, we also put a blank line between the 'main' includes (foo.h) + and the C/C++ system includes, and another between the system + includes and the rest of the Google includes. + + If the two move spans are in different reorder_ranges, that means + the first move_span is at the end of a reorder range. In that case, + a different rule for blank lines applies: if the next line is + contentful (eg 'static int x = 5;'), or a namespace start, we want + to insert a blank line to separate the move-span from the next + block. When figuring out if the next line is contentful, we skip + over comments. + + Arguments: + decorated_move_span: a decorated_move_span we may want to put a blank + line after. + next_decorated_move_span: the next decorated_move_span, which may + be a sentinel decorated_move_span at end-of-file. + file_lines: an array of LineInfo objects with .deleted filled in. + flags: commandline flags, as parsed by argparse. We use + flags.blank_lines, which controls whether we put blank + lines between different 'kinds' of #includes. + + Returns: + true if we should insert a blank line after decorated_move_span. + """ + # First handle the 'at the end of a reorder range' case. + if decorated_move_span[0] != next_decorated_move_span[0]: + next_line = _NextNondeletedLine(file_lines, decorated_move_span[0][1] - 1) + # Skip over comments to figure out if the next line is contentful. + while (next_line and next_line < len(file_lines) and + file_lines[next_line].type == _COMMENT_LINE_RE): + next_line += 1 + return (next_line and next_line < len(file_lines) and + file_lines[next_line].type in (_NAMESPACE_START_RE, + _NAMESPACE_START_ALLMAN_RE, + _NAMESPACE_START_MIXED_RE, + _PRAGMA_PUSH_LINE_RE, + None)) + + # We never insert a blank line between two spans of the same kind. + # Nor do we ever insert a blank line at EOF. + (this_kind, next_kind) = (decorated_move_span[1], next_decorated_move_span[1]) + if this_kind == next_kind or next_kind == _EOF_KIND: + return False + + # We also never insert a blank line between C and C++-style #includes, + # no matter what the flag value. + if (this_kind in [_C_SYSTEM_INCLUDE_KIND, _CXX_SYSTEM_INCLUDE_KIND] and + next_kind in [_C_SYSTEM_INCLUDE_KIND, _CXX_SYSTEM_INCLUDE_KIND]): + return False + + # Handle the case we're going from an include to fwd declare or + # back. If we get here, we can't both be fwd-declares, so it + # suffices to check if either of us is. + if this_kind == _FORWARD_DECLARE_KIND or next_kind == _FORWARD_DECLARE_KIND: + return True + + # Now, depending on the flag, we insert a blank line whenever the + # kind changes (we handled the one case where a changing kind + # doesn't introduce a blank line, above). + if flags.blank_lines: + return this_kind != next_kind + + return False + + +def _GetToplevelReorderSpans(file_lines): + """Returns a sorted list of all reorder_spans not inside an + #ifdef/namespace/class. + + This routine looks at all the reorder_spans in file_lines, ignores + reorder spans inside #ifdefs and namespaces -- except for the 'header + guard' ifdef that encapsulates an entire .h file -- and returns the + rest in sorted order. + + Arguments: + file_lines: an array of LineInfo objects with .type and + .reorder_span filled in. + + Returns: + A list of [start_line, end_line) reorder_spans. + """ + in_ifdef = [False] * len(file_lines) # lines inside an #if + ifdef_depth = 0 + for line_number in range(len(file_lines)): + line_info = file_lines[line_number] + if line_info.deleted: + continue + if line_info.type == _IF_RE: # does not cover the header-guard ifdef + ifdef_depth += 1 + elif line_info.type == _ENDIF_RE: + ifdef_depth -= 1 + if ifdef_depth > 0: + in_ifdef[line_number] = True + + # Figuring out whether a } ends a namespace or some other languague + # construct is hard, so as soon as we see any 'contentful' line + # inside a namespace, we assume the entire rest of the file is in + # the namespace. + in_namespace = [False] * len(file_lines) + namespace_depth = 0 + for line_number in range(len(file_lines)): + line_info = file_lines[line_number] + if line_info.deleted: + continue + if line_info.type in (_NAMESPACE_START_RE, _NAMESPACE_START_MIXED_RE): + # The 'max' is because the namespace-re may be a macro. + namespace_depth += max(line_info.line.count('{'), 1) + elif line_info.type == _NAMESPACE_CONTINUE_ALLMAN_MIXED_RE: + namespace_depth += 1 + elif line_info.type == _NAMESPACE_END_RE: + namespace_depth -= max(line_info.line.count('}'), 1) + if namespace_depth > 0: + in_namespace[line_number] = True + if line_info.type is None: + for i in range(line_number, len(file_lines)): # rest of file + in_namespace[i] = True + break + + reorder_spans = [fl.reorder_span for fl in file_lines if fl.reorder_span] + reorder_spans = sorted(set(reorder_spans)) + good_reorder_spans = [] + for reorder_span in reorder_spans: + for line_number in range(*reorder_span): + if (in_ifdef[line_number] or in_namespace[line_number] or + file_lines[line_number].is_nested_forward_declaration): + break + else: # for/else + good_reorder_spans.append(reorder_span) # never in ifdef or namespace + + return good_reorder_spans + + +def _GetNamespaceLevelReorderSpans(file_lines): + """Returns a list of reorder-spans inside namespaces, if it's easy to do. + + This routine is meant to handle the simple case where code consists + of includes and forward-declares, and then a 'namespace + my_namespace'. We return the reorder spans of the inside-namespace + forward-declares, which is a good place to insert new + inside-namespace forward-declares (rather than putting these new + forward-declares at the top level). + + So it goes through the top of the file, stopping at the first + 'contentful' line. If that line has the form 'namespace {', + it then continues until it finds a forward-declare line, or a + non-namespace contentful line. In the former case, it figures out + the reorder-span this forward-declare line is part of, while in the + latter case it creates a new reorder-span. A list of these namespace + reorder spans are returned so they can all be checked. These elements + are in the form (enclosing_namespace, reorder_span). + + Arguments: + file_lines: an array of LineInfo objects with .type and + .reorder_span filled in. + + Returns: + [] if we could not find any namespace-level reorder-spans, or + [(enclosing_namespace, reorder_span), ...], where enclosing_namespace + is a string that looks like (for instance) + 'namespace ns1 { namespace ns2 {', and reorder-span is a + [start_line, end_line) pair. + """ + + def _GetNamespaceNames(namespace_line): + """Returns a list of namespace names given a namespace line. Anonymous + namespaces will return an empty string + """ + namespace_re = re.compile(r'\s*namespace\b(.*)') + namespaces = [] + namespace_line = namespace_line.split("/")[0] # remove C++ comments + namespace_line = namespace_line.split("{") # extract all namespaces + for namespace in namespace_line: + m = namespace_re.match(namespace) + if m: + namespaces.append(m.group(1).strip()) + + return namespaces + + namespace_reorder_spans = {} + try: + namespace_prefixes = [] + pending_namespace_prefix = '' + ifdef_depth = 0 + + for line_number, line_info in enumerate(file_lines): + if line_info.deleted: + continue + + # If we're an empty line, just ignore us. Likewise with #include + # lines, which aren't 'contentful' for our purposes, and the + # header guard, which is (by definition) the only kind of #ifdef + # that we can be inside and still considered at the "top level". + if line_info.type in (_COMMENT_LINE_RE, + _BLANK_LINE_RE, + _INCLUDE_RE, + _HEADER_GUARD_RE, + _HEADER_GUARD_DEFINE_RE, + _PRAGMA_ONCE_LINE_RE): + continue + + # If we're a 'contentful' line such as a (non-header-guard) #ifdef, add + # to the ifdef depth. If we encounter #endif, reduce the ifdef depth. + # Only keep track of namespaces when ifdef depth is 0 + elif line_info.type == _IF_RE: + ifdef_depth += 1 + + elif line_info.type == _ELSE_RE: + continue + + elif line_info.type == _ENDIF_RE: + ifdef_depth -= 1 + + elif ifdef_depth != 0: + continue # skip lines until we're outside of an ifdef block + + # Build the simplified namespace dictionary. When any new namespace is + # encountered, add the namespace to the list using the next line to cover + # namespaces without forward declarations. When a forward declare is + # found, update the dictionary using the existing namespace span that the + # forward declare contains. Once a contentful line (None) has been found + # or any exception occurs, return the results that have been found. Any + # forward declare that wasn't able to have a proper namespace name found + # will still propagate to the top of the file. + elif line_info.type == _NAMESPACE_START_RE: + for namespace in _GetNamespaceNames(line_info.line): + if not namespace: + namespace_prefixes.append('namespace {') + else: + namespace_prefixes.append('namespace %s {' % namespace) + + namespace_reorder_spans[' '.join(namespace_prefixes)] = ( + line_number+1, line_number+1) + + elif line_info.type == _NAMESPACE_START_ALLMAN_RE: + pending_namespace_prefix = '' + namespaces = _GetNamespaceNames(line_info.line) + if len(namespaces) != 1: + raise FixIncludesError('Allman namespace found containing multiple ' + 'names: %s', line_info.line) + for namespace in namespaces: + if not namespace: + pending_namespace_prefix += 'namespace' + else: + pending_namespace_prefix += 'namespace %s' % namespace + + elif line_info.type == _NAMESPACE_START_MIXED_RE: + # For mixed namespace styles, we need to append normalized prefixes + # using regular and Allman style. Treat the first elements as + # normal and only treat the final element as Allman. By the + # nature of mixed namespaces, there will always be more than + # one namespace so it is okay to assume that _GetNamespaceNames + # will always return multiple records. + pending_namespace_prefix = '' + namespaces = _GetNamespaceNames(line_info.line) + for namespace in namespaces[:-1]: + if not namespace: + namespace_prefixes.append('namespace {') + else: + namespace_prefixes.append('namespace %s {' % namespace) + + if not namespaces[-1]: + pending_namespace_prefix += 'namespace' + else: + pending_namespace_prefix += 'namespace %s' % namespaces[-1] + + elif line_info.type == _NAMESPACE_CONTINUE_ALLMAN_MIXED_RE: + # Append to the simplified allman namespace. + if pending_namespace_prefix == '': + raise FixIncludesError('Namespace bracket found without an associated ' + 'namespace name at line: %s', line_number) + pending_namespace_prefix += ' {' + namespace_prefixes.append(pending_namespace_prefix) + namespace_reorder_spans[' '.join(namespace_prefixes)] = ( + line_number+1, line_number+1) + + elif line_info.type == _NAMESPACE_END_RE: + # Remove C++ comments and count the ending brackets. + namespace_end_count = line_info.line.split("/")[0].count("}") + namespace_prefixes = namespace_prefixes[:-namespace_end_count] + + elif line_info.type == _FORWARD_DECLARE_RE: + # If we're not in a namespace, keep going. Otherwise, this is + # just the situation we're looking for! Update the dictionary + # with the better reorder span + if len(namespace_prefixes) > 0: + namespace_reorder_spans[' '.join(namespace_prefixes)] = ( + line_info.reorder_span) + + elif line_info.type == None: + break + + else: + # We should have handled all the cases above! + assert False, ('unknown line-info type', + _LINE_TYPES.index(line_info.type)) + except Exception as why: + # Namespace detection could be tricky so take what we have and return. + print('DEBUG: Namespace detection returned prematurely because of an ' + 'exception: %s' % (why)) + pass + + # return a reverse sorted list so longest matches are checked first + return sorted(namespace_reorder_spans.items(), reverse=True) + + +# These are potential 'kind' arguments to _FirstReorderSpanWith. +_MAIN_CU_INCLUDE_KIND = 1 # e.g. #include "foo.h" when editing foo.cc +_C_SYSTEM_INCLUDE_KIND = 2 # e.g. #include +_CXX_SYSTEM_INCLUDE_KIND = 3 # e.g. #include +_NONSYSTEM_INCLUDE_KIND = 4 # e.g. #include "bar.h" +_PROJECT_INCLUDE_KIND = 5 # e.g. #include "myproject/quux.h" +_FORWARD_DECLARE_KIND = 6 # e.g. class Baz; +_EOF_KIND = 7 # used at eof + +# The span kinds are defined in default sort order, so generate a default +# identity mapping. +SORT_ORDER_DEFAULT = { + kind: kind for kind in range(_MAIN_CU_INCLUDE_KIND, _EOF_KIND + 1) +} + +# In quoted-first mode, we sort all quoted kinds before system kinds. +SORT_ORDER_QUOTED_FIRST = { + _MAIN_CU_INCLUDE_KIND: 1, + _NONSYSTEM_INCLUDE_KIND: 2, + _PROJECT_INCLUDE_KIND: 3, + _C_SYSTEM_INCLUDE_KIND: 4, + _CXX_SYSTEM_INCLUDE_KIND: 5, + _FORWARD_DECLARE_KIND: 6, + _EOF_KIND: 7, +} + +def _IsSystemInclude(line_info): + """Given a line-info, return true iff the line is a <>-style #include.""" + # The key for #includes includes the <> or "", so this is easy. :-) + return line_info.type == _INCLUDE_RE and line_info.key[0] == '<' + + +def _IsMainCUInclude(line_info, filename): + """Given a line-info, return true iff the line is a 'main-CU' #include line. + + A 'main-CU' #include line is one that is related to the file being edited. + For instance, if we are editing foo.cc, foo.h is a main-CU #include, as + is foo-inl.h. The same holds if we are editing foo_test.cc. + + The algorithm is like so: first, canonicalize the includee by removing the + following suffixes: + -inl.h .h + + Then canonicalize the includer by removing file extension and then the + following suffixes: + _unittest _regtest _test + + Rule 1: If the canonical names (filenames after removal) match -- + including all directories -- the .h file is a main-cu #include. + + Rule 2: If the basenames of the canonnical names match -- that is, + ignoring all directories -- the .h file is a main-cu #include *if* + it is the first #include seen. + + Arguments: + line_info: a LineInfo structure with .type, + .is_first_line_of_this_type, and .key filled in. + filename: the name of the file being edited. + + Returns: + True if line_info is an #include of a main_CU file, False else. + """ + if line_info.type != _INCLUDE_RE or _IsSystemInclude(line_info): + return False + if _IWYU_PRAGMA_ASSOCIATED_RE.search(line_info.line): + return True + # First, normalize the includee by getting rid of -inl.h and .h + # suffixes (for the #include) and the "'s around the #include line. + canonical_include = re.sub(r'(-inl\.h|\.h|\.hpp)$', '', + line_info.key.replace('"', ''), flags=re.I) + # Then normalize includer by stripping extension and Google's test suffixes. + canonical_file, _ = os.path.splitext(filename) + canonical_file = re.sub(r'(_unittest|_regtest|_test)$', '', canonical_file) + # .h files in /public/ match .cc files in /internal/. + canonical_include2 = re.sub(r'/public/', '/internal/', canonical_include) + + # Rule 1: + if canonical_file in (canonical_include, canonical_include2): + return True + # Rule 2: + if (line_info.is_first_line_of_this_type and + os.path.basename(canonical_file) == os.path.basename(canonical_include)): + return True + + return False + + +def _GetPathRoot(path): + """ Return the root of a path, i.e. the first path component. + We allow / as an alternative path separator on Windows because it helps with + testing and forward slashes are common even on Windows in portable codebases. + """ + first_sep = path.find(os.path.sep) + if os.path.sep != '/' and first_sep == -1: + first_sep = path.find('/') + + if first_sep == -1: + return None + + return path[0:first_sep] + + +def _IsSameProject(line_info, edited_file, project): + """Return true if included file and edited file are in the same project. + + An included_file is in project 'project' if the project is a prefix of the + included_file. 'project' should end with /. + + As a special case, if project is '', then the project is defined to + be the top-level directory of edited_file. + + Arguments: + line_info: a LineInfo structure with .key containing the file that is + being included. + edited_file: the name of the file being edited. + project: if '', set the project path to be the top-level directory + name of the file being edited. If not '', this value is used to + specify the project directory. + + Returns: + True if line_info and filename belong in the same project, False otherwise. + """ + included_file = line_info.key[1:] + if project != '': + return included_file.startswith(project) + included_root = _GetPathRoot(included_file) + edited_root = _GetPathRoot(edited_file) + return (included_root and edited_root and included_root == edited_root) + + +def _GetLineKind(file_line, filename, separate_project_includes): + """Given a file_line + file being edited, return best *_KIND value or None.""" + line_without_coments = _COMMENT_RE.sub('', file_line.line) + if file_line.deleted: + return None + elif _IsMainCUInclude(file_line, filename): + return _MAIN_CU_INCLUDE_KIND + elif _IsSystemInclude(file_line) and '.' in line_without_coments: + return _C_SYSTEM_INCLUDE_KIND + elif _IsSystemInclude(file_line): + return _CXX_SYSTEM_INCLUDE_KIND + elif file_line.type == _INCLUDE_RE: + if (separate_project_includes and + _IsSameProject(file_line, filename, separate_project_includes)): + return _PROJECT_INCLUDE_KIND + return _NONSYSTEM_INCLUDE_KIND + elif file_line.type == _FORWARD_DECLARE_RE: + return _FORWARD_DECLARE_KIND + else: + return None + + +def _FirstReorderSpanWith(file_lines, good_reorder_spans, kind, filename, + flags): + """Returns [start_line,end_line) of 1st reorder_span with a line of kind kind. + + This function iterates over all the reorder_spans in file_lines, and + calculates the first one that has a line of the given kind in it. + If no such reorder span is found, it takes the last span of 'lower' + kinds (main-cu kind is lowest, forward-declare is highest). If no + such reorder span is found, it takes the first span of 'higher' + kind, but not considering the forward-declare kind (we don't want to + put an #include with the first forward-declare, because it may be + inside a class or something weird). If there's *still* no match, we + return the first line past leading comments, whitespace, and #ifdef + guard lines. If there's *still* no match, we just insert at + end-of-file. + + As a special case, we never return a span for forward-declares that is + after 'contentful' code, even if other forward-declares are there. + For instance: + using Foo::Bar; + class Bang; + We want to make sure to put 'namespace Foo { class Bar; }' + *before* the using line! + + kind is one of the following enums, with examples: + _MAIN_CU_INCLUDE_KIND: #include "foo.h" when editing foo.cc + _C_SYSTEM_INCLUDE_KIND: #include + _CXX_SYSTEM_INCLUDE_KIND: #include + _NONSYSTEM_INCLUDE_KIND: #include "bar.h" + _PROJECT_INCLUDE_KIND: #include "myproject/quux.h" + _FORWARD_DECLARE_KIND: class Baz; + + Arguments: + file_lines: an array of LineInfo objects with .type and + .reorder_span filled in. + good_reorder_spans: a sorted list of reorder_spans to consider + (should not include reorder_spans inside #ifdefs or + namespaces). + kind: one of *_KIND values. + filename: the name of the file that file_lines comes from. + This is passed to _GetLineKind (are we a main-CU #include?) + flags: commandline flags, as parsed by argparse. We use + flags.separate_project_includes to sort the #includes for the + current project separately from other #includes. + + Returns: + A pair of line numbers, [start_line, end_line), that is the 'best' + reorder_span in file_lines for the given kind. + """ + assert kind in (_MAIN_CU_INCLUDE_KIND, _C_SYSTEM_INCLUDE_KIND, + _CXX_SYSTEM_INCLUDE_KIND, _NONSYSTEM_INCLUDE_KIND, + _PROJECT_INCLUDE_KIND, _FORWARD_DECLARE_KIND), kind + # Figure out where the first 'contentful' line is (after the first + # 'good' span, so we skip past header guards and the like). Basically, + # the first contentful line is a line not in any reorder span. + for i in range(len(good_reorder_spans) - 1): + if good_reorder_spans[i][1] != good_reorder_spans[i+1][0]: + first_contentful_line = good_reorder_spans[i][1] + break + else: # got to the end of the file without finding a break in the spans + if good_reorder_spans: + first_contentful_line = good_reorder_spans[-1][1] + else: + first_contentful_line = 0 + + # Let's just find the first and last span for each kind. + first_reorder_spans = {} + last_reorder_spans = {} + for reorder_span in good_reorder_spans: + for line_number in range(*reorder_span): + line_kind = _GetLineKind(file_lines[line_number], filename, + flags.separate_project_includes) + # Ignore forward-declares that come after 'contentful' code; we + # never want to insert new forward-declares there. + if (line_kind == _FORWARD_DECLARE_KIND and + line_number > first_contentful_line): + continue + if line_kind is not None: + first_reorder_spans.setdefault(line_kind, reorder_span) + last_reorder_spans[line_kind] = reorder_span + + # Find the first span of our kind. + if kind in first_reorder_spans: + return first_reorder_spans[kind] + + # Second choice: last span of the kinds above us: + for backup_kind in range(kind - 1, _MAIN_CU_INCLUDE_KIND - 1, -1): + if backup_kind in last_reorder_spans: + return last_reorder_spans[backup_kind] + + # Third choice: first span of the kinds below us, but not counting + # _FORWARD_DECLARE_KIND. + for backup_kind in range(kind + 1, _FORWARD_DECLARE_KIND): + if backup_kind in first_reorder_spans: + return first_reorder_spans[backup_kind] + + # There are no reorder-spans at all, or they are only + # _FORWARD_DECLARE spans. Return the first line past the leading + # comments, whitespace, and #ifdef guard lines, or the beginning + # of the _FORWARD_DECLARE span, whichever is smaller. + line_number = 0 + seen_header_guard = False + while line_number < len(file_lines): + if file_lines[line_number].deleted: + line_number += 1 + elif file_lines[line_number].type == _HEADER_GUARD_RE: + seen_header_guard = True + line_number += 2 # skip over the header guard + elif file_lines[line_number].type == _BLANK_LINE_RE: + line_number += 1 + elif file_lines[line_number].type == _PRAGMA_ONCE_LINE_RE: + seen_header_guard = True + line_number += 1 + elif (file_lines[line_number].type == _COMMENT_LINE_RE + and not seen_header_guard): + # We put #includes after top-of-file comments. But comments + # inside the header guard are no longer top-of-file comments; + # #includes go before them. + line_number += 1 + else: + # If the "first line" we would return is inside the forward-declare + # reorder span, just return that span, rather than creating a new + # span inside the existing one. + if first_reorder_spans: + assert list(first_reorder_spans.keys()) == [_FORWARD_DECLARE_KIND], \ + first_reorder_spans + if line_number >= first_reorder_spans[_FORWARD_DECLARE_KIND][0]: + return first_reorder_spans[_FORWARD_DECLARE_KIND] + return (line_number, line_number) + + # OK, I guess just insert at the end of the file + return (len(file_lines), len(file_lines)) + + +def _RemoveNamespacePrefix(fwd_decl_iwyu_line, namespace_prefix): + """Return a version of the input line with namespace_prefix removed, or None. + + If fwd_decl_iwyu_line is + namespace ns1 { namespace ns2 { namespace ns3 { foo } } } + and namespace_prefix = 'namespace ns1 { namespace ns2 {', then + this function returns 'namespace ns3 { foo }'. It removes the + namespace_prefix, and any } }'s at the end of the line. If line + does not fit this form, then this function returns None. + + Arguments: + line: a line from iwyu about a forward-declare line to add + namespace_prefix: a non-empty string of the form + namespace { namespace { [...] + + Returns: + A version of the input line with the namespaces in namespace + prefix removed, or None if this is not possible because the input + line is not of the right form. + """ + assert namespace_prefix, "_RemoveNamespaces requires a non-empty prefix" + if not fwd_decl_iwyu_line.startswith(namespace_prefix): + return None + + # Remove the prefix + fwd_decl_iwyu_line = fwd_decl_iwyu_line[len(namespace_prefix):].lstrip() + + # Remove the matching trailing }'s, preserving comments. + num_braces = namespace_prefix.count('{') + ending_braces_re = re.compile(r'(\s*\}){%d}\s*$' % num_braces) + m = ending_braces_re.search(fwd_decl_iwyu_line) + if not m: + return None + fwd_decl_iwyu_line = fwd_decl_iwyu_line[:m.start(0)] + + return fwd_decl_iwyu_line + + +def _DecoratedMoveSpanLines(iwyu_record, file_lines, move_span_lines, flags): + """Given a span of lines from file_lines, returns a "decorated" result. + + First, we construct the actual contents of the move-span, as a list + of strings (one per line). If we see an #include in the move_span, + we replace its comments with the ones in iwyu_record, if present + (iwyu_record will never have any comments if flags.comments is + False). + + Second, we construct a string, of the 'contentful' part of the + move_span -- that is, without the leading comments -- with + whitespace removed, and a few other changes made. This is used for + sorting (we remove whitespace so '# include ' compares properly + against '#include '). + + Third, we figure out the 'kind' of this span: system include, + main-cu include, etc. + + We return all of these together in a tuple, along with the + reorder-span this move span is inside. We pick the best + reorder-span if one isn't already present (because it's an + #include we're adding in, for instance.) This allows us to sort + all the moveable content. + + Arguments: + iwyu_record: the IWYUOutputRecord struct for this source file. + file_lines: a list of LineInfo objects holding the parsed output of + the file in iwyu_record.filename + move_span_lines: A list of LineInfo objects. For #includes and + forward-declares already in the file, this will be a sub-list + of file_lines. For #includes and forward-declares we're adding + in, it will be a newly created list. + flags: commandline flags, as parsed by argparse. We use + flags.separate_project_includes to sort the #includes for the + current project separately from other #includes. + + Returns: + A tuple (reorder_span, kind, sort_key, all_lines_as_list) + sort_key is the 'contentful' part of the move_span, which whitespace + removed, and -inl.h changed to _inl.h (so it sorts later). + all_lines_as_list is a list of strings, not of LineInfo objects. + Returns None if the move-span has been deleted, or for some other + reason lacks an #include or forward-declare line. + """ + # Get to the first contentful line. + for i in range(len(move_span_lines)): + if (not move_span_lines[i].deleted and + move_span_lines[i].type in (_INCLUDE_RE, _FORWARD_DECLARE_RE)): + first_contentful_line = i + break + else: # for/else + # No include or forward-declare line seen, must be a deleted span. + return None + + firstline = move_span_lines[first_contentful_line] + m = _INCLUDE_RE.match(firstline.line) + if m: + # If we're an #include, the contentful lines are easy. But we have + # to do the comment-replacing first. + sort_key = firstline.line + iwyu_version = iwyu_record.full_include_lines.get(m.group(1), '') + if _COMMENT_LINE_RE.search(iwyu_version): # the iwyu version has comments + sort_key = iwyu_version # replace the comments + all_lines = ([li.line for li in move_span_lines[:-1] if not li.deleted] + + [sort_key]) + else: + # We're a forward-declare. Also easy. + contentful_list = [li.line for li in move_span_lines[first_contentful_line:] + if not li.deleted] + sort_key = ''.join(contentful_list) + all_lines = [li.line for li in move_span_lines if not li.deleted] + + # Get rid of whitespace in the contentful_lines + sort_key = re.sub(r'\s+', '', sort_key) + # Replace -inl.h with _inl.h so foo-inl.h sorts after foo.h in #includes. + sort_key = sort_key.replace('-inl.h', '_inl.h') + + # Next figure out the kind. + kind = _GetLineKind(firstline, iwyu_record.filename, + flags.separate_project_includes) + + # All we're left to do is the reorder-span we're in. Hopefully it's easy. + reorder_span = firstline.reorder_span + if reorder_span is None: # must be a new #include we're adding + # If we're a forward-declare inside a namespace, see if there's a + # reorder span inside the same namespace we can fit into. + if kind == _FORWARD_DECLARE_KIND: + namespace_reorder_spans = _GetNamespaceLevelReorderSpans(file_lines) + for namespace_prefix, possible_reorder_span in namespace_reorder_spans: + if (namespace_prefix and possible_reorder_span and + firstline.line.startswith(namespace_prefix)): + # Great, we can go into this reorder_span. We also need to + # modify all-lines because this line doesn't need the + # namespace prefix anymore. Make sure we can do that before + # succeeding. + new_firstline = _RemoveNamespacePrefix(firstline.line, namespace_prefix) + if new_firstline: + assert all_lines[first_contentful_line] == firstline.line + all_lines[first_contentful_line] = new_firstline + sort_key = re.sub(r'\s+', '', new_firstline) + reorder_span = possible_reorder_span + break + + # If that didn't work out, find a top-level reorder span to go into. + if reorder_span is None: + # TODO(csilvers): could make this more efficient by storing, per-kind. + toplevel_reorder_spans = _GetToplevelReorderSpans(file_lines) + reorder_span = _FirstReorderSpanWith(file_lines, toplevel_reorder_spans, + kind, iwyu_record.filename, flags) + + return (reorder_span, kind, sort_key, all_lines) + + +def _CommonPrefixLength(a, b): + """Given two lists, returns the index of 1st element not common to both.""" + end = min(len(a), len(b)) + for i in range(end): + if a[i] != b[i]: + return i + return end + + +def _NormalizeNamespaceForwardDeclareLines(lines): + """'Normalize' namespace lines in a list of output lines and return new list. + + When suggesting new forward-declares to insert, iwyu uses the following + format, putting each class on its own line with all namespaces: + namespace foo { namespace bar { class A; } } + namespace foo { namespace bar { class B; } } + namespace foo { namespace bang { class C; } } + We convert this to 'normalized' form, which puts namespaces on their + own line and collects classes together: + namespace foo { + namespace bar { + class A; + class B; + } // namespace bar + namespace bang { + class C; + } // namespace bang + } // namespace foo + + Non-namespace lines are left alone. Only adjacent namespace lines + from the input are merged. + + Arguments: + lines: a list of output-lines -- that is, lines that are ready to + be emitted as-is to the output file. + + Returns: + A new version of lines, with namespace lines normalized as above. + """ + # iwyu input is very regular, which is nice. + iwyu_namespace_re = re.compile(r'namespace ([^{]*) { ') + iwyu_classname_re = re.compile(r'{ ([^{}]*) }') + + retval = [] + current_namespaces = [] + # We append a blank line so the final namespace-closing happens "organically". + for line in lines + ['']: + namespaces_in_line = iwyu_namespace_re.findall(line) + differ_pos = _CommonPrefixLength(namespaces_in_line, current_namespaces) + namespaces_to_close = reversed(current_namespaces[differ_pos:]) + namespaces_to_open = namespaces_in_line[differ_pos:] + retval.extend('} // namespace %s' % ns for ns in namespaces_to_close) + retval.extend('namespace %s {' % ns for ns in namespaces_to_open) + current_namespaces = namespaces_in_line + # Now add the current line. If we were a namespace line, it's the + # 'class' part of the line (everything but the 'namespace {'s). + if namespaces_in_line: + m = iwyu_classname_re.search(line) + if not m: + raise FixIncludesError('Malformed namespace line from iwyu: %s', line) + retval.append(m.group(1)) + else: + retval.append(line) + + assert retval and retval[-1] == '', 'What happened to our sentinel line?' + return retval[:-1] + + +def _DeleteLinesAccordingToIwyu(iwyu_record, file_lines): + """Deletes all lines that iwyu_record tells us to, and cleans up after.""" + for line_number in iwyu_record.lines_to_delete: + # Delete the entire move-span (us and our preceding comments). + for i in range(*file_lines[line_number].move_span): + file_lines[i].deleted = True + + while True: + num_deletes = _DeleteEmptyNamespaces(file_lines) + num_deletes += _DeleteEmptyIfdefs(file_lines) + if num_deletes == 0: + break + + # Also delete any duplicate lines in the input. To avoid trouble + # (accidentally deleting inside an #ifdef, for instance), we only + # check 'top-level' #includes and forward-declares. + toplevel_reorder_spans = _GetToplevelReorderSpans(file_lines) + _DeleteDuplicateLines(file_lines, toplevel_reorder_spans) + + # If a whole reorder span was deleted, check if it has extra + # whitespace on both sides that we could trim. We've already + # deleted extra blank lines inside #ifdefs and namespaces, + # so looking at toplevel spans is enough. + for reorder_span in toplevel_reorder_spans: + _DeleteExtraneousBlankLines(file_lines, reorder_span) + + +def _GetSymbolNameFromForwardDeclareLine(line): + """Given a forward declare line to add from iwyu output, get symbol. + + Two possibilities: In or not in namespace(s). + If in namespaces, then return foo::bar::sym. + Else just sym. + """ + iwyu_namespace_re = re.compile(r'namespace ([^{]*) { ') + symbolname_re = re.compile(r'([A-Za-z0-9_]+)') + # Turn anonymous namespaces into their proper symbol representation. + namespaces_in_line = iwyu_namespace_re.findall(line.replace( + "namespace {", "namespace (anonymous namespace) {")) + symbols_in_line = symbolname_re.findall(line) + symbol_name = symbols_in_line[-1] + if (namespaces_in_line): + symbol_name = '::'.join(namespaces_in_line) + '::' + symbol_name + return symbol_name + + +def GetLineSortOrdinal(kind, quoted_includes_first): + if quoted_includes_first: + return SORT_ORDER_QUOTED_FIRST[kind] + else: + return SORT_ORDER_DEFAULT[kind] + + +def FixFileLines(iwyu_record, file_lines, flags, fileinfo): + """Applies one block of lines from the iwyu output script. + + Called once we have read all the lines from the iwyu output script + pertaining to a single source file, and parsed them into an + iwyu_record. At that point we edit the source file, remove the old + #includes and forward-declares, insert the #includes and + forward-declares, and reorder the lot, all as specified by the iwyu + output script. The resulting source code lines are returned. + + Arguments: + iwyu_record: an IWYUOutputRecord object holding the parsed output + of the include-what-you-use script (run at verbose level 1 or + higher) pertaining to a single source file. + file_lines: a list of LineInfo objects holding the parsed output of + the file in iwyu_record.filename + flags: commandline flags, as parsed by argparse. We use + flags.safe_headers to turn off deleting lines, and use the + other flags indirectly (via calls to other routines). + fileinfo: FileInfo for the current file. + + Returns: + An array of 'fixed' source code lines, after modifications as + specified by iwyu. + """ + # First delete the includes and forward-declares that we should delete. + # This is easy since iwyu tells us the line numbers. + if not (flags.safe_headers and _MayBeHeaderFile(iwyu_record.filename)): + _DeleteLinesAccordingToIwyu(iwyu_record, file_lines) + + # With these deletions, we may be able to merge together some + # reorder-spans. Recalculate them to see. + _CalculateReorderSpans(file_lines) + + # For every move-span in our file -- that's every #include and + # forward-declare we saw -- 'decorate' the move-range to allow us + # to sort them. + move_spans = OrderedSet([fl.move_span for fl in file_lines if fl.move_span]) + decorated_move_spans = [] + for (start_line, end_line) in move_spans: + decorated_span = _DecoratedMoveSpanLines(iwyu_record, file_lines, + file_lines[start_line:end_line], + flags) + if decorated_span: + decorated_move_spans.append(decorated_span) + + # Now let's add in a decorated move-span for all the new #includes + # and forward-declares. + symbol_names_seen = set() + for line in iwyu_record.includes_and_forward_declares_to_add: + line_info = LineInfo(line) + m = _INCLUDE_RE.match(line) + if m: + line_info.type = _INCLUDE_RE + line_info.key = m.group(1) + else: + # Avoid duplicates that can arise if different template args + # were suggested by different iwyu analyses for this file. + symbol_name = _GetSymbolNameFromForwardDeclareLine(line) + if symbol_name in symbol_names_seen: + continue + symbol_names_seen.add(symbol_name) + line_info.type = _FORWARD_DECLARE_RE + decorated_span = _DecoratedMoveSpanLines(iwyu_record, file_lines, + [line_info], flags) + assert decorated_span, 'line to add is not an #include or fwd-decl?' + decorated_move_spans.append(decorated_span) + + # Add a sentinel decorated move-span, to make life easy, and sort. + decorated_move_spans.append(((len(file_lines), len(file_lines)), + _EOF_KIND, '', [])) + + def key(decorated_span): + reorder_span, kind, sort_key, all_lines = decorated_span + kind_key = GetLineSortOrdinal(kind, flags.quoted_includes_first) + if flags.reorder: + return reorder_span, kind_key, sort_key, all_lines + else: + return reorder_span, kind_key + + decorated_move_spans.sort(key=key) + + # Now go through all the lines of the input file and construct the + # output file. Before we get to the next reorder-span, we just + # copy lines over verbatim (ignoring deleted lines, of course). + # In a reorder-span, we just print the sorted content, introducing + # blank lines when appropriate. + output_lines = [] + line_number = 0 + while line_number < len(file_lines): + current_reorder_span = decorated_move_spans[0][0] + + # Just copy over all the lines until the next reorder span. + while line_number < current_reorder_span[0]: + if not file_lines[line_number].deleted: + output_lines.append(file_lines[line_number].line) + line_number += 1 + + # Now fill in the contents of the reorder-span from decorated_move_spans + new_lines = [] + while (decorated_move_spans and + decorated_move_spans[0][0] == current_reorder_span): + new_lines.extend(decorated_move_spans[0][3]) # the full content + if (len(decorated_move_spans) > 1 and + _ShouldInsertBlankLine(decorated_move_spans[0], + decorated_move_spans[1], file_lines, flags)): + new_lines.append('') + decorated_move_spans = decorated_move_spans[1:] # pop + + if not flags.keep_iwyu_namespace_format: + # Now do the munging to convert namespace lines from the iwyu input + # format to the 'official style' format: + # 'namespace foo { class Bar; }\n' -> 'namespace foo {\nclass Bar;\n}' + # along with collecting multiple classes in the same namespace. + new_lines = _NormalizeNamespaceForwardDeclareLines(new_lines) + + # Add line separators to the new lines. + new_lines = [nl.rstrip() + fileinfo.linesep for nl in new_lines] + + output_lines.extend(new_lines) + line_number = current_reorder_span[1] # go to end of span + + return [line for line in output_lines if line is not None] + + +def FixOneFile(iwyu_record, file_contents, flags, fileinfo): + """Parse a file guided by an iwyu_record and flags and apply IWYU fixes. + Returns two lists of lines (old, fixed). + """ + file_lines = ParseOneFile(file_contents, iwyu_record) + old_lines = [fl.line for fl in file_lines + if fl is not None and fl.line is not None] + fixed_lines = FixFileLines(iwyu_record, file_lines, flags, fileinfo) + return old_lines, fixed_lines + + +def FixManyFiles(iwyu_records, flags): + """Given a list of iwyu_records, fix each file listed in the record. + + For each iwyu record in the input, which lists the #includes and + forward-declares to add, remove, and re-sort, loads the file, makes + the fixes, and writes the fixed file to disk. The flags affect the + details of the fixing. + + Arguments: + iwyu_records: a collection of IWYUOutputRecord objects holding + the parsed output of the include-what-you-use script (run at + verbose level 1 or higher) pertaining to a single source file. + iwyu_record.filename indicates what file to edit. + flags: commandline flags, as parsed by argparse.. + + Returns: + The number of files fixed (as opposed to ones that needed no fixing). + """ + files_fixed = 0 + for iwyu_record in iwyu_records: + try: + fileinfo = FileInfo.parse(iwyu_record.filename) + + file_contents = _ReadFile(iwyu_record.filename, fileinfo) + if not file_contents: + continue + + print(">>> Fixing #includes in '%s'" % iwyu_record.filename) + old_lines, fixed_lines = FixOneFile(iwyu_record, file_contents, flags, fileinfo) + if old_lines == fixed_lines: + print("No changes in file %s" % iwyu_record.filename) + continue + + if flags.dry_run: + PrintFileDiff(old_lines, fixed_lines) + else: + _WriteFile(iwyu_record.filename, fileinfo, fixed_lines) + + files_fixed += 1 + except FixIncludesError as why: + print('ERROR: %s - skipping file %s' % (why, iwyu_record.filename)) + + print('IWYU edited %d files on your behalf.\n' % files_fixed) + return files_fixed + + +def ProcessIWYUOutput(f, files_to_process, flags, cwd): + """Fix the #include and forward-declare lines as directed by f. + + Given a file object that has the output of the include_what_you_use + script, see every file to be edited and edit it, if appropriate. + + Arguments: + f: an iterable object that is the output of include_what_you_use. + files_to_process: A set of filenames, or None. If not None, we + ignore files mentioned in f that are not in files_to_process. + flags: commandline flags, as parsed by argparse. The only flag + we use directly is flags.ignore_re, to indicate files not to + process; we also pass the flags to other routines. + cwd: the current working directory, externalized for testing. + + Returns: + The number of files that had to be modified (because they weren't + already all correct). In dry_run mode, returns the number of + files that would have been modified. + """ + if files_to_process is not None: + files_to_process = [NormalizeFilePath(cwd, fname) + for fname in files_to_process] + + # First collect all the iwyu data from stdin. + + # Maintain sort order by using OrderedDict instead of dict + iwyu_output_records = OrderedDict() # IWYUOutputRecords keyed by filename + while True: + iwyu_output_parser = IWYUOutputParser() + try: + iwyu_record = iwyu_output_parser.ParseOneRecord(f, flags) + if not iwyu_record: + break + except FixIncludesError as why: + print('ERROR: %s' % why) + continue + filename = NormalizeFilePath(flags.basedir, iwyu_record.filename) + if files_to_process is not None and filename not in files_to_process: + print('(skipping %s: not listed on commandline)' % filename) + continue + if flags.ignore_re and re.search(flags.ignore_re, filename): + print('(skipping %s: it matches --ignore_re, which is %s)' % ( + filename, flags.ignore_re)) + continue + if flags.only_re and not re.search(flags.only_re, filename): + print('(skipping %s: it does not match --only_re, which is %s)' % ( + filename, flags.only_re)) + continue + + if filename in iwyu_output_records: + iwyu_output_records[filename].Merge(iwyu_record) + else: + iwyu_output_records[filename] = iwyu_record + + # Now ignore all the files that never had any contentful changes + # seen for them. (We have to wait until we're all done, since a .h + # file may have a contentful change when #included from one .cc + # file, but not another, and we need to have merged them above.) + if not flags.update_comments: + for filename in iwyu_output_records: + if not iwyu_output_records[filename].HasContentfulChanges(): + print('(skipping %s: iwyu reports no contentful changes)' % filename) + # Mark that we're skipping this file by setting the record to None + iwyu_output_records[filename] = None + + # Now do all the fixing, and return the number of files modified + contentful_records = [ior for ior in iwyu_output_records.values() if ior] + return FixManyFiles(contentful_records, flags) + + +def NormalizeFilePath(basedir, filename): + """ Normalize filename to be comparable. + + If basedir has a value and filename is not already absolute, make filename + absolute. Otherwise return filename as-is. + """ + if basedir and not os.path.isabs(filename): + return os.path.normpath(os.path.join(basedir, filename)) + return filename + + +def SortIncludesInFiles(files_to_process, flags): + """For each file in files_to_process, sort its #includes. + + This reads each input file, sorts the #include lines, and replaces + the input file with the result. SortIncludesInFiles does not add + or remove any #includes. It also ignores forward-declares. + + Arguments: + files_to_process: a list (or set) of filenames. + flags: commandline flags, as parsed by argparse. We do not use + any flags directly, but pass them to other routines. + + Returns: + The number of files that had to be modified (because they weren't + already all correct, that is, already in sorted order). + """ + sort_only_iwyu_records = [] + for filename in files_to_process: + filename = NormalizeFilePath(flags.basedir, filename) + # An empty iwyu record has no adds or deletes, so its only effect + # is to cause us to sort the #include lines. (Since fix_includes + # gets all its knowledge of where forward-declare lines are from + # the iwyu input, with an empty iwyu record it just ignores all + # the forward-declare lines entirely.) + sort_only_iwyu_records.append(IWYUOutputRecord(filename)) + return FixManyFiles(sort_only_iwyu_records, flags) + + +def main(argv): + # Parse the command line. + parser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, + description='Update files based on include-what-you-use output', + epilog=_EPILOG) + parser.add_argument('-b', '--blank_lines', action='store_true', default=True, + help=('Put a blank line between primary header file and' + ' C/C++ system #includes, and another blank line' + ' between system #includes and google #includes' + ' [default]')) + parser.add_argument('--noblank_lines', action='store_false', + dest='blank_lines') + + parser.add_argument('--comments', action='store_true', default=False, + help='Put comments after the #include lines') + parser.add_argument('--nocomments', action='store_false', dest='comments') + + parser.add_argument('--update_comments', action='store_true', default=False, + help=('Replace \'why\' comments with the ones provided by' + ' IWYU')) + parser.add_argument('--noupdate_comments', action='store_false', + dest='update_comments') + + parser.add_argument('--safe_headers', action='store_true', default=True, + help=('Do not remove unused #includes/fwd-declares from' + ' header files; just add new ones [default]')) + parser.add_argument('--nosafe_headers', action='store_false', + dest='safe_headers') + + parser.add_argument('--reorder', action='store_true', default=False, + help=('Re-order lines relative to other similar lines ' + '(e.g. headers relative to other headers)')) + parser.add_argument('--noreorder', action='store_false', dest='reorder', + help=('Do not re-order lines relative to other similar ' + 'lines.')) + + parser.add_argument('-s', '--sort_only', action='store_true', + help=('Just sort #includes of files listed on cmdline;' + ' do not add or remove any #includes')) + + parser.add_argument('-n', '--dry_run', action='store_true', default=False, + help=('Do not actually edit any files; just print diffs.' + ' Return code is 0 if no changes are needed,' + ' else min(the number of files that would be' + ' modified, 100)')) + + parser.add_argument('--ignore_re', default=None, + help=('%(prog)s will skip editing any file whose name' + ' matches this regular expression.')) + + parser.add_argument('--only_re', default=None, + help=('%(prog)s will skip editing any file whose name' + ' does not match this regular expression.')) + + parser.add_argument('--separate_project_includes', default=None, + help=('Sort #includes for current project separately' + ' from all other #includes. This flag specifies' + ' the root directory of the current project.' + ' If the value is "", #includes that share the' + ' same top-level directory are assumed to be in the' + ' same project. If not specified, project #includes' + ' will be sorted with other non-system #includes.')) + + parser.add_argument('-m', '--keep_iwyu_namespace_format', action='store_true', + default=False, + help=('Keep forward-declaration namespaces in IWYU format' + ', eg. namespace n1 { namespace n2 { class c1; } }.' + ' Do not convert to "normalized" Google format: ' + 'namespace n1 {\\nnamespace n2 {\\n class c1;' + '\\n}\\n}.')) + parser.add_argument('--nokeep_iwyu_namespace_format', action='store_false', + dest='keep_iwyu_namespace_format') + + parser.add_argument('--basedir', '-p', default=None, + help=('Specify the base directory. fix_includes will ' + 'interpret non-absolute filenames relative to this ' + 'path.')) + parser.add_argument('--quoted_includes_first', action='store_true', + default=False, + help='When sorting includes, place quoted ones first') + + parser.add_argument('files', nargs='*', metavar='FILES') + + flags = parser.parse_args(argv[1:]) + if flags.files: + files_to_modify = set(flags.files) + else: + files_to_modify = None + + if (flags.separate_project_includes and + not flags.separate_project_includes.startswith('<') and # 'special' vals + not flags.separate_project_includes.endswith(os.path.sep) and + not flags.separate_project_includes.endswith('/')): + flags.separate_project_includes += os.path.sep + + if flags.update_comments: + flags.comments = True + + if flags.sort_only: + if not files_to_modify: + sys.exit('FATAL ERROR: -s flag requires a list of filenames') + SortIncludesInFiles(files_to_modify, flags) + else: + ProcessIWYUOutput(sys.stdin, files_to_modify, flags, cwd=os.getcwd()) + + return 0 + + +if __name__ == '__main__': + sys.exit(main(sys.argv))