Source code for testing.find_unadorned_strings

#!/usr/bin/env python3
# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4; encoding:utf-8 -*-
#
# Copyright 2018 Aaron Whitehouse <aaron@whitehouse.kiwi.nz>
#
# This file is part of duplicity.
#
# Duplicity is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# Duplicity is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with duplicity; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

# For predictable results in python/3 all string literals need to be marked as unicode, bytes or raw
# This code finds all unadorned string literals (strings that are not marked with a u, b or r)

from __future__ import print_function

import sys
import tokenize
import token

# Unfortunately python does not have the useful named tuple result from tokenize.tokenize,
# so we have to recreate the effect using namedtuple and tokenize.generate_tokens
from collections import namedtuple
python_token = namedtuple(u'python_token', u'type string start end line')


[docs]def return_unadorned_string_tokens(f):
    if sys.version_info[0] < 3:
        unnamed_tokens = tokenize.generate_tokens(f.readline)
        for t in unnamed_tokens:
            named_token = python_token(token.tok_name[t[0]], *t[1:])
            if named_token.type == u"STRING" and named_token.string[0] in [u'"', u"'"]:
                yield named_token

    else:
        named_tokens = tokenize.tokenize(f.readline)
        for t in named_tokens:
            if t.type == token.STRING and t.string[0] in [u'"', u"'"]:
                yield t


[docs]def check_file_for_unadorned(python_file):
    unadorned_string_list = []
    with open(python_file, u'rb') as f:
        for s in return_unadorned_string_tokens(f):
            unadorned_string_list.append((python_file, s.start, s.end, s.string))
    return unadorned_string_list


if __name__ == u"__main__":
    import argparse

    parser = argparse.ArgumentParser(description=u'Find any unadorned string literals in a Python file')
    parser.add_argument(u'file', help=u'The file to search')
    args = parser.parse_args()

    unadorned_string_list = check_file_for_unadorned(args.file)
    if len(unadorned_string_list) == 0:
        print(u"There are no unadorned strings in", args.file)
    else:
        print(u"There are unadorned strings in", args.file, u"\n")
        for unadorned_string in unadorned_string_list:
            print(unadorned_string)
            python_file, string_start, string_end, string = unadorned_string
            print(string_start, string)