pass2bw/pass2csv

#!/usr/bin/env python3
import csv
import logging
import os
import re
import pathlib
from argparse import Action, ArgumentParser, FileType

import gnupg

logger = logging.getLogger(__name__)


class CSVExporter:
    def __init__(self, kpx_format, login_fields, get_url, exclude_rows):
        logging.basicConfig(level=logging.INFO)
        self.logger = logger

        # Set to True to allow for alternate password csv to be created
        # See README for differences
        self.kpx_format = kpx_format

        if self.kpx_format:
            # A list of possible fields (in order) that could be converted to
            # login fields
            self.login_fields = login_fields or []
            # Set to True to extract url fields
            self.get_url = get_url
            # A regular expression list of lines that should be excluded from
            # the notes field
            self.exclude_rows = exclude_rows or []

        self.logger.info("Using KPX format: %s", self.kpx_format)

    def traverse(self, path):
        for root, dirs, files in os.walk(path):
            if '.git' in dirs:
                dirs.remove('.git')
            for name in files:
                yield os.path.join(root, name)

    def get_metadata(self, notes_raw):
        lines = notes_raw.split('\n')

        # A list of lines to keep as notes (will be joined by newline)
        notes = []
        # The extracted user field
        user = ''
        # The extracted URL field
        url = ''

        # This will extract each field name (for example, if a line in notes
        # was `user: user1`, fields should contain 'user')
        all_fields = set()
        for line in lines:
            field_search = re.search('^(.*) ?: ?.*$', line, re.I)
            if field_search:
                all_fields.add(field_search.group(1))

        # Check if any of the fields match the login names
        login_fields = [
            field for field in self.login_fields if field in all_fields
        ]
        # Get the field to use for the login. Since self.login_fields is in order,
        # the 0th element will contain the first match
        login_field = None if not login_fields else login_fields[0]

        # Iterate through the file again to build the return array
        for line in lines:
            # If any of the exclusion patterns match, ignore the line
            if [pattern for pattern in self.exclude_rows if re.search(pattern, line, re.I)]:
                continue

            if login_field:
                user_search = re.search(
                    '^' + login_field + ' ?: ?(.*)$', line, re.I)
                if user_search:
                    user = user_search.group(1)
                    # The user was matched, don't add it to notes
                    continue

            if self.get_url:
                url_search = re.search('^url ?: ?(.*)$', line, re.I)
                if url_search:
                    url = url_search.group(1)
                    # The url was matched, don't add it to notes
                    continue

            notes.append(line)

        return (user, url, '\n'.join(notes).strip())

    def parse(self, basepath, path, data):
        p = pathlib.Path(path)
        name = p.stem
        self.logger.info("Processing %s", name)
        group = os.path.dirname(os.path.relpath(path, basepath))
        split_data = data.split('\n', maxsplit=1)
        password = split_data[0]
        # Perform if/else in case there are no notes for a field
        notes = split_data[1] if len(split_data) > 1 else ""
        if self.kpx_format:
            # We are using the advanced format; try extracting user and url
            user, url, notes = self.get_metadata(notes)
            return [group, name, user, password, url, notes]
        else:
            # We are not using KPX format; just use notes
            return [group, name, password, notes]


def main(gpgbinary, use_agent, pass_path, base_path,
         kpx_format, login_fields, get_url, exclude_rows, outfile):
    exporter = CSVExporter(kpx_format, login_fields, get_url, exclude_rows)
    gpg = gnupg.GPG(use_agent=use_agent, gpgbinary=gpgbinary)
    gpg.encoding = 'utf-8'
    csv_data = []
    for file_path in exporter.traverse(pass_path):
        if os.path.splitext(file_path)[1] == '.gpg':
            with open(file_path, 'rb') as f:
                data = str(gpg.decrypt_file(f))
                if len(data) == 0:
                    logger.warning("Could not decrypt %s or it is empty.", file_path)
                base = base_path if base_path else pass_path
                parsed = exporter.parse(base, file_path, data)
                csv_data.append(parsed)

    writer = csv.writer(outfile, delimiter=',')
    writer.writerows(csv_data)
    outfile.close()


class ExtendAction(Action):
    # Python 3.8 has 'extend' built in.
    def __call__(self, parser, namespace, values, option_string=None):
        items = getattr(namespace, self.dest) or []
        items.extend(values)
        setattr(namespace, self.dest, items)


class OptionsParser(ArgumentParser):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        self.add_argument(
            'pass_path',
            metavar='path',
            type=str,
            help="path to the password-store folder to export",
        )

        self.add_argument(
            '-b', '--base',
            type=str,
            help="path to use as base for grouping passwords",
            dest='base_path'
        )

        self.add_argument(
            '-a', '--agent',
            action='store_true',
            help="ask gpg to use its auth agent",
            dest='use_agent',
        )

        self.add_argument(
            '-g', '--gpgbinary',
            type=str,
            help="path to the gpg binary you wish to use",
            dest='gpgbinary',
            default="gpg"
        )

        self.add_argument(
            '-o', '--outfile',
            type=FileType('w'),
            help="Store to an output file",
            dest='outfile',
            default="-"
        )

        self.add_argument(
            '-x', '--kpx',
            action='store_true',
            help="format the CSV for KeePassXC",
            dest='kpx_format',
        )

        self.add_argument(
            '-l', '--login-fields',
            action=ExtendAction,
            nargs='+',
            type=str,
            help="strings to interpret as names of login fields (only used with -x)"
        )

        self.add_argument(
            '-u', '--get-url',
            action='store_true',
            help="match row starting with 'url:' and extract it (only used with -x)"
        )

        self.add_argument(
            '-e', '--exclude-rows',
            action=ExtendAction,
            nargs='+',
            type=str,
            help="regexps to exclude from the notes field (only used with -x)"
        )


if __name__ == '__main__':
    PARSER = OptionsParser()
    ARGS = PARSER.parse_args()
    main(**vars(ARGS))
Note python3 requirement 2018-08-15 00:59:11 -04:00			`#!/usr/bin/env python3`
First version 2016-05-29 19:00:02 -04:00			`import csv`
New features and code refactor 2018-12-27 12:12:06 -05:00			`import logging`
First version 2016-05-29 19:00:02 -04:00			`import os`
Add advanced features 2018-08-14 18:29:18 -04:00			`import re`
- Identify groups even if run on a subdirectory as long as it contains ".password-store". Eg: `pass2csv .password-store/foo/bar/` - Output to stdout by default + ability to set a custom path 2020-10-24 17:26:01 -04:00			`import pathlib`
			`from argparse import Action, ArgumentParser, FileType`
First version 2016-05-29 19:00:02 -04:00
New features and code refactor 2018-12-27 12:12:06 -05:00			`import gnupg`

Warn, don't crash, if not able to decrypt or empty file 2020-06-07 16:40:54 -04:00			`logger = logging.getLogger(__name__)`

New features and code refactor 2018-12-27 12:12:06 -05:00
Prepare for release and upload to PyPI 2020-06-08 02:16:58 -04:00			`class CSVExporter:`
Configure everything through cmdline, update README 2020-06-07 16:39:28 -04:00			`def __init__(self, kpx_format, login_fields, get_url, exclude_rows):`
New features and code refactor 2018-12-27 12:12:06 -05:00			`logging.basicConfig(level=logging.INFO)`
Warn, don't crash, if not able to decrypt or empty file 2020-06-07 16:40:54 -04:00			`self.logger = logger`
New features and code refactor 2018-12-27 12:12:06 -05:00
			`# Set to True to allow for alternate password csv to be created`
			`# See README for differences`
			`self.kpx_format = kpx_format`

			`if self.kpx_format:`
Configure everything through cmdline, update README 2020-06-07 16:39:28 -04:00			`# A list of possible fields (in order) that could be converted to`
			`# login fields`
			`self.login_fields = login_fields or []`
New features and code refactor 2018-12-27 12:12:06 -05:00			`# Set to True to extract url fields`
Configure everything through cmdline, update README 2020-06-07 16:39:28 -04:00			`self.get_url = get_url`
			`# A regular expression list of lines that should be excluded from`
			`# the notes field`
			`self.exclude_rows = exclude_rows or []`
New features and code refactor 2018-12-27 12:12:06 -05:00
			`self.logger.info("Using KPX format: %s", self.kpx_format)`

			`def traverse(self, path):`
			`for root, dirs, files in os.walk(path):`
			`if '.git' in dirs:`
			`dirs.remove('.git')`
			`for name in files:`
			`yield os.path.join(root, name)`
Add advanced features 2018-08-14 18:29:18 -04:00
Minor style fixes 2020-06-07 14:39:08 -04:00			`def get_metadata(self, notes_raw):`
New features and code refactor 2018-12-27 12:12:06 -05:00			`lines = notes_raw.split('\n')`

			`# A list of lines to keep as notes (will be joined by newline)`
			`notes = []`
			`# The extracted user field`
			`user = ''`
			`# The extracted URL field`
			`url = ''`

Minor style fixes 2020-06-07 14:39:08 -04:00			`# This will extract each field name (for example, if a line in notes`
			# was `user: user1`, fields should contain 'user')
New features and code refactor 2018-12-27 12:12:06 -05:00			`all_fields = set()`
			`for line in lines:`
			`field_search = re.search('^(.) ?: ?.$', line, re.I)`
			`if field_search:`
			`all_fields.add(field_search.group(1))`

			`# Check if any of the fields match the login names`
			`login_fields = [`
Minor style fixes 2020-06-07 14:39:08 -04:00			`field for field in self.login_fields if field in all_fields`
			`]`
			`# Get the field to use for the login. Since self.login_fields is in order,`
			`# the 0th element will contain the first match`
New features and code refactor 2018-12-27 12:12:06 -05:00			`login_field = None if not login_fields else login_fields[0]`

			`# Iterate through the file again to build the return array`
			`for line in lines:`
			`# If any of the exclusion patterns match, ignore the line`
			`if [pattern for pattern in self.exclude_rows if re.search(pattern, line, re.I)]:`
Add advanced features 2018-08-14 18:29:18 -04:00			`continue`

New features and code refactor 2018-12-27 12:12:06 -05:00			`if login_field:`
			`user_search = re.search(`
			`'^' + login_field + ' ?: ?(.*)$', line, re.I)`
			`if user_search:`
			`user = user_search.group(1)`
			`# The user was matched, don't add it to notes`
			`continue`

			`if self.get_url:`
			`url_search = re.search('^url ?: ?(.*)$', line, re.I)`
			`if url_search:`
			`url = url_search.group(1)`
			`# The url was matched, don't add it to notes`
			`continue`

			`notes.append(line)`

			`return (user, url, '\n'.join(notes).strip())`

			`def parse(self, basepath, path, data):`
- Identify groups even if run on a subdirectory as long as it contains ".password-store". Eg: `pass2csv .password-store/foo/bar/` - Output to stdout by default + ability to set a custom path 2020-10-24 17:26:01 -04:00			`p = pathlib.Path(path)`
			`name = p.stem`
Add argument to specify base path for grouping of passwords 2020-11-01 11:59:54 -05:00			`self.logger.info("Processing %s", name)`
			`group = os.path.dirname(os.path.relpath(path, basepath))`
New features and code refactor 2018-12-27 12:12:06 -05:00			`split_data = data.split('\n', maxsplit=1)`
			`password = split_data[0]`
			`# Perform if/else in case there are no notes for a field`
			`notes = split_data[1] if len(split_data) > 1 else ""`
			`if self.kpx_format:`
			`# We are using the advanced format; try extracting user and url`
Minor style fixes 2020-06-07 14:39:08 -04:00			`user, url, notes = self.get_metadata(notes)`
New features and code refactor 2018-12-27 12:12:06 -05:00			`return [group, name, user, password, url, notes]`
			`else:`
			`# We are not using KPX format; just use notes`
			`return [group, name, password, notes]`


Add argument to specify base path for grouping of passwords 2020-11-01 11:59:54 -05:00			`def main(gpgbinary, use_agent, pass_path, base_path,`
- Identify groups even if run on a subdirectory as long as it contains ".password-store". Eg: `pass2csv .password-store/foo/bar/` - Output to stdout by default + ability to set a custom path 2020-10-24 17:26:01 -04:00			`kpx_format, login_fields, get_url, exclude_rows, outfile):`
Configure everything through cmdline, update README 2020-06-07 16:39:28 -04:00			`exporter = CSVExporter(kpx_format, login_fields, get_url, exclude_rows)`
New features and code refactor 2018-12-27 12:12:06 -05:00			`gpg = gnupg.GPG(use_agent=use_agent, gpgbinary=gpgbinary)`
First version 2016-05-29 19:00:02 -04:00			`gpg.encoding = 'utf-8'`
			`csv_data = []`
New features and code refactor 2018-12-27 12:12:06 -05:00			`for file_path in exporter.traverse(pass_path):`
First version 2016-05-29 19:00:02 -04:00			`if os.path.splitext(file_path)[1] == '.gpg':`
			`with open(file_path, 'rb') as f:`
			`data = str(gpg.decrypt_file(f))`
Better check for failed GPG decryption. 2019-05-18 10:11:58 -04:00			`if len(data) == 0:`
Warn, don't crash, if not able to decrypt or empty file 2020-06-07 16:40:54 -04:00			`logger.warning("Could not decrypt %s or it is empty.", file_path)`
Add argument to specify base path for grouping of passwords 2020-11-01 11:59:54 -05:00			`base = base_path if base_path else pass_path`
			`parsed = exporter.parse(base, file_path, data)`
			`csv_data.append(parsed)`
First version 2016-05-29 19:00:02 -04:00
- Identify groups even if run on a subdirectory as long as it contains ".password-store". Eg: `pass2csv .password-store/foo/bar/` - Output to stdout by default + ability to set a custom path 2020-10-24 17:26:01 -04:00			`writer = csv.writer(outfile, delimiter=',')`
			`writer.writerows(csv_data)`
			`outfile.close()`
First version 2016-05-29 19:00:02 -04:00

Use custom argparse 'extend'-action for compatibility with py<3.8 2020-08-08 11:45:51 -04:00			`class ExtendAction(Action):`
			`# Python 3.8 has 'extend' built in.`
			`def __call__(self, parser, namespace, values, option_string=None):`
			`items = getattr(namespace, self.dest) or []`
			`items.extend(values)`
			`setattr(namespace, self.dest, items)`


New features and code refactor 2018-12-27 12:12:06 -05:00			`class OptionsParser(ArgumentParser):`
			`def __init__(self, args, *kwargs):`
			`super().__init__(args, *kwargs)`

			`self.add_argument(`
Path is positional arg 2019-01-01 08:57:31 -05:00			`'pass_path',`
			`metavar='path',`
New features and code refactor 2018-12-27 12:12:06 -05:00			`type=str,`
Configure everything through cmdline, update README 2020-06-07 16:39:28 -04:00			`help="path to the password-store folder to export",`
New features and code refactor 2018-12-27 12:12:06 -05:00			`)`

Add argument to specify base path for grouping of passwords 2020-11-01 11:59:54 -05:00			`self.add_argument(`
			`'-b', '--base',`
			`type=str,`
			`help="path to use as base for grouping passwords",`
			`dest='base_path'`
			`)`

New features and code refactor 2018-12-27 12:12:06 -05:00			`self.add_argument(`
			`'-a', '--agent',`
			`action='store_true',`
Configure everything through cmdline, update README 2020-06-07 16:39:28 -04:00			`help="ask gpg to use its auth agent",`
New features and code refactor 2018-12-27 12:12:06 -05:00			`dest='use_agent',`
			`)`

			`self.add_argument(`
Add argument to specify base path for grouping of passwords 2020-11-01 11:59:54 -05:00			`'-g', '--gpgbinary',`
New features and code refactor 2018-12-27 12:12:06 -05:00			`type=str,`
Configure everything through cmdline, update README 2020-06-07 16:39:28 -04:00			`help="path to the gpg binary you wish to use",`
New features and code refactor 2018-12-27 12:12:06 -05:00			`dest='gpgbinary',`
			`default="gpg"`
			`)`

- Identify groups even if run on a subdirectory as long as it contains ".password-store". Eg: `pass2csv .password-store/foo/bar/` - Output to stdout by default + ability to set a custom path 2020-10-24 17:26:01 -04:00			`self.add_argument(`
			`'-o', '--outfile',`
			`type=FileType('w'),`
			`help="Store to an output file",`
			`dest='outfile',`
			`default="-"`
			`)`

New features and code refactor 2018-12-27 12:12:06 -05:00			`self.add_argument(`
			`'-x', '--kpx',`
			`action='store_true',`
Configure everything through cmdline, update README 2020-06-07 16:39:28 -04:00			`help="format the CSV for KeePassXC",`
New features and code refactor 2018-12-27 12:12:06 -05:00			`dest='kpx_format',`
			`)`

Configure everything through cmdline, update README 2020-06-07 16:39:28 -04:00			`self.add_argument(`
			`'-l', '--login-fields',`
Use custom argparse 'extend'-action for compatibility with py<3.8 2020-08-08 11:45:51 -04:00			`action=ExtendAction,`
Configure everything through cmdline, update README 2020-06-07 16:39:28 -04:00			`nargs='+',`
			`type=str,`
			`help="strings to interpret as names of login fields (only used with -x)"`
			`)`

			`self.add_argument(`
			`'-u', '--get-url',`
			`action='store_true',`
			`help="match row starting with 'url:' and extract it (only used with -x)"`
			`)`

			`self.add_argument(`
			`'-e', '--exclude-rows',`
Use custom argparse 'extend'-action for compatibility with py<3.8 2020-08-08 11:45:51 -04:00			`action=ExtendAction,`
Configure everything through cmdline, update README 2020-06-07 16:39:28 -04:00			`nargs='+',`
			`type=str,`
			`help="regexps to exclude from the notes field (only used with -x)"`
			`)`

New features and code refactor 2018-12-27 12:12:06 -05:00
First version 2016-05-29 19:00:02 -04:00			`if __name__ == '__main__':`
New features and code refactor 2018-12-27 12:12:06 -05:00			`PARSER = OptionsParser()`
			`ARGS = PARSER.parse_args()`
			`main(**vars(ARGS))`