pass2bw/pass2csv.py

#!/usr/bin/env python3
import argparse
import csv
import pathlib
import re
import sys

import gnupg

from collections import OrderedDict

__version__ = '1.1.1'


def stderr(s, *args, **kwargs):
    print(s, *args, file=sys.stderr, **kwargs)


def set_meta(entry, path, grouping_base):
    pure_path = pathlib.PurePath(path)
    group = pure_path.relative_to(grouping_base).parent
    if group.name == '':
        group = ''
    entry['group'] = group
    entry['title'] = pure_path.stem


def set_data(entry, data, exclude, get_fields, get_lines):
    lines = data.splitlines()
    tail = lines[1:]
    entry['password'] = lines[0]

    filtered_tail = []
    for line in tail:
        for exclude_pattern in exclude:
            if exclude_pattern.search(line):
                break
        else:
            filtered_tail.append(line)

    matching_indices = set()
    fields = entry.setdefault('fields', {})

    for i, line in enumerate(filtered_tail):
        for name, pattern in get_fields:
            if name in fields:
                # multiple patterns with same name, we've already found a match
                continue
            match = pattern.search(line)
            if not match:
                continue
            inverse_match = line[0:match.start()] + line[match.end():]
            value = inverse_match.strip()
            fields[name] = value
            matching_indices.add(i)
            break

    matching_lines = {}
    for i, line in enumerate(filtered_tail):
        for name, pattern, subc in get_lines:
            match = pattern.search(line)
            if not match:
                continue
            matches = matching_lines.setdefault(name, [])
            if subc is not None:
                line = subc.sub('', line)
            matches.append(line)
            matching_indices.add(i)
            break
    for name, matches in matching_lines.items():
        fields[name] = '\n'.join(matches)

    final_tail = []
    for i, line in enumerate(filtered_tail):
        if i not in matching_indices:
            final_tail.append(line)

    entry['notes'] = '\n'.join(final_tail).strip()


bwFieldSet = OrderedDict({
    'folder': 'group',
    'favorite': None,
    'type': 'type',
    'name': 'title',
    'notes': 'notes',
    'fields': '__fields',
    'reprompt': 'reprompt',
    'login_uri':  'url',
    'login_username': 'username',
    'login_password': 'password',
    'login_totp': 'otp'
    })

header = [key for key, value in bwFieldSet.items()]

def write(file, entries, get_fields, get_lines, notes, ignore, usernames, no_degroup):
    get_field_names = set(x[0] for x in get_fields)
    get_line_names = set(x[0] for x in get_lines)
    field_names = get_field_names | get_line_names
    csvw = csv.writer(file, dialect='unix')
    stderr(f"\nWriting data to {file.name}\n")
    csvw.writerow(header)
    for entry in entries:
        if entry['title'] in ignore:
            continue
        columns = []
        fieldIndex = -1
        if entry['group'] not in no_degroup:
            for upat in usernames:
                m = upat.search(entry['title'])
                if m:
                    # username match, clean up the entry
                    if entry['fields'].get('username') in ['', None]:
                        entry['fields']['username'] = m[0]
                    else:
                        entry['fields']['titleuser'] = m[0]
                    oldTitle = entry['title']
                    entry['title'] = upat.sub(str(entry['group']), str(entry['title'])).rstrip(":")
                    entry['fields']['passEntry'] = '{}/{}'.format(entry['group'], oldTitle)
                    entry['group'] = ''
        for bwf, pf in bwFieldSet.items():
            if pf is None:
                columns.append('')
            elif pf == 'type':
                if entry['title'] in notes:
                    columns.append('note')
                else:
                    columns.append('login')
            elif pf == '__fields':
                columns.append('')
                fieldIndex = len(columns) - 1
                pass
            elif pf in ['group', 'title', 'notes', 'password']:
                try:
                    columns.append(entry[pf])
                except KeyError:
                    columns.append('')
            else:
                try:
                    columns.append(entry['fields'][pf])
                    del entry['fields'][pf]
                except KeyError:
                    columns.append('')
        # get remaining fields
        fields = ""
        for fKey, value in entry['fields'].items():
            fields += "{}: {}\n".format(fKey, value)
        columns[fieldIndex] = fields.rstrip('\n')
        csvw.writerow(columns)


def main(store_path, outfile, grouping_base, gpgbinary, use_agent, encodings,
         exclude, get_fields, get_lines, notes, ignore, usernames, no_degroup):
    entries = []
    failures = []
    path = pathlib.Path(store_path)
    grouping_path = pathlib.Path(grouping_base)
    gpg = gnupg.GPG(gpgbinary=gpgbinary, use_agent=use_agent)
    files = path.glob('**/*.gpg')
    if not path.is_dir():
        if path.is_file():
            files = [path]
        else:
            stderr(f"No such file or directory: {path}")
            sys.exit(1)
    for file in files:
        stderr(f"Processing {file}")
        with open(file, 'rb') as fp:
            decrypted = gpg.decrypt_file(fp)
        if not decrypted.ok:
            err = f"Could not decrypt {file}: {decrypted.status}"
            stderr(err)
            failures.append(err)
            continue
        for i, encoding in enumerate(encodings):
            try:
                # decrypted.data is bytes
                decrypted_data = decrypted.data.decode(encoding)
            except Exception as e:
                stderr(f"Could not decode {file} with encoding {encoding}: {e}")
                continue
            if i > 0:
                # don't log if the first encoding worked
                stderr(f"Decoded {file} with encoding {encoding}")
            break
        else:
            err = "Could not decode {}, see messages above for more info.".format(file)
            failures.append(err)
            continue
        entry = {}
        set_meta(entry, file, grouping_path)
        set_data(entry, decrypted_data, exclude, get_fields, get_lines)
        entries.append(entry)
    if failures:
        stderr("\nGot errors while processing files:")
        for err in failures:
            stderr(err)
    if not entries:
        stderr("\nNothing to write.")
        sys.exit(1)
    write(outfile, entries, get_fields, get_lines, notes, ignore, usernames, no_degroup)


def parse_args(args=None):
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'store_path',
        metavar='STOREPATH',
        type=str,
        help="path to the password-store to export",
    )

    parser.add_argument(
        'outfile',
        metavar='OUTFILE',
        type=argparse.FileType('w'),
        help="file to write exported data to, use - for stdout",
    )

    parser.add_argument(
        '-b', '--base',
        metavar='path',
        type=str,
        help="path to use as base for grouping passwords",
        dest='base_path'
    )

    parser.add_argument(
        '-g', '--gpg',
        metavar='executable',
        type=str,
        default="gpg",
        help="path to the gpg binary you wish to use (default: '%(default)s')",
        dest='gpgbinary'
    )

    parser.add_argument(
        '-a', '--use-agent',
        action='store_true',
        default=False,
        help="ask gpg to use its auth agent",
        dest='use_agent'
    )

    parser.add_argument(
        '--encodings',
        metavar='encodings',
        type=str,
        default="utf-8",
        help=(
            "comma-separated text encodings to try, in order, when decoding"
            " gpg output (default: '%(default)s')"
        ),
        dest='encodings'
    )

    parser.add_argument(
        '-e', '--exclude',
        metavar='pattern',
        action='append',
        type=str,
        default=['^autotype:'],
        help=(
            "regexp for lines which should not be exported, can be specified"
            " multiple times"
        ),
        dest='exclude'
    )

    parser.add_argument(
        '-f', '--get-field',
        metavar=('name', 'pattern'),
        action='append',
        nargs=2,
        type=str,
        default=[['pin', '^(phone)?pin: '], ['username', '^(user(name)?|login): ']],
        help=(
            "a name and a regexp, the part of the line matching the regexp"
            " will be removed and the remaining line will be added to a field"
            " with the chosen name. only one match per password, matching"
            " stops after the first match"
        ),
        dest='get_fields'
    )

    parser.add_argument(
        '-l', '--get-line',
        metavar=('name', 'pattern', 'replace'),
        action='append',
        nargs=3,
        type=str,
        default=[['otp', 'otpauth://', ''], ['url', 'https?://', '^url: *']],
        help=(
            "a name and a regexp for which all lines that match are included"
            " in a field with the chosen name. replace is a regex to remove matches of from entry."
        ),
        dest='get_lines'
    )

    parser.add_argument(
        '--version',
        action='version',
        version='%(prog)s ' + __version__
    )

    parser.add_argument(
        '-n', '--note',
        metavar='title',
        action='append',
        type=str,
        default=[],
        help=(
            "a title of entry that should be added as a note, "
            "can be specified multiple times"
        ),
        dest='notes'
    )

    parser.add_argument(
        '-i', '--ignore',
        metavar='title',
        action='append',
        type=str,
        default=[],
        help=(
            "a title of entry that should be ignored, "
            "can be specified multiple times"
        ),
        dest='ignore'
    )

    parser.add_argument(
        '-u', '--username',
        metavar='title',
        action='append',
        type=str,
        default=[],
        help=(
            "a regex matching a username in title, "
            "can be specified multiple times. any match will be replaced with the group name"
        ),
        dest='usernames'
    )

    parser.add_argument(
        '-D', '--no-degroup',
        metavar='title',
        action='append',
        type=str,
        default=[],
        help=(
            "a list of groups not to perform username degrouping on, "
            "can be specified multiple times."
        ),
        dest='no_degroup'
    )

    return parser.parse_args(args)


def compile_regexp(pattern):
    try:
        regexp = re.compile(pattern, re.I)
    except re.error as e:
        escaped = pattern.replace("'", "\\'")
        stderr(f"Could not compile pattern '{escaped}', {e.msg} at position {e.pos}")
        return None
    return regexp


def cli():
    parsed = parse_args()

    failed = False
    exclude_patterns = []
    for pattern in parsed.exclude:
        regexp = compile_regexp(pattern)
        if not regexp:
            failed = True
        exclude_patterns.append(regexp)

    get_fields = []
    for name, pattern in parsed.get_fields:
        regexp = compile_regexp(pattern)
        if not regexp:
            failed = True
        get_fields.append((name, regexp))

    get_lines = []
    for name, pattern, subs in parsed.get_lines:
        regexp = compile_regexp(pattern)
        if not regexp:
            failed = True
        if subs == '':
            subc = None
        else:
            subc = compile_regexp(subs)
        get_lines.append((name, regexp, subc))

    username_patterns = []
    for pattern in parsed.usernames:
        regexp = compile_regexp(pattern)
        if not regexp:
            failed = True
        username_patterns.append(regexp)

    if failed:
        sys.exit(1)

    if parsed.base_path:
        grouping_base = parsed.base_path
    else:
        grouping_base = parsed.store_path

    encodings = [e for e in parsed.encodings.split(',') if e]
    if not encodings:
        stderr(f"Did not understand '--encodings {parsed.encoding}'")
        sys.exit(1)

    kwargs = {
        'store_path': parsed.store_path,
        'outfile': parsed.outfile,
        'grouping_base': grouping_base,
        'gpgbinary': parsed.gpgbinary,
        'use_agent': parsed.use_agent,
        'encodings': encodings,
        'exclude': exclude_patterns,
        'get_fields': get_fields,
        'get_lines': get_lines,
        'notes': parsed.notes,
        'ignore': parsed.ignore,
        'usernames': username_patterns,
        'no_degroup': parsed.no_degroup
    }

    main(**kwargs)

if __name__ == '__main__':
    cli()