Full rewrite with new flexible funcionality

2021-05-19 21:30:10 +02:00 · 2021-05-19 21:30:10 +02:00 · c130ca7ee1
commit c130ca7ee1
parent 4418d476f7
3 changed files with 320 additions and 250 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,7 @@
 pass.csv
-venv/
+
 __pycache__/
 *.egg-info/
 build/
 dist/
 venv/
--- a/README.md
+++ b/README.md
@ -11,92 +11,94 @@ You can install it directly from PyPI with pip:
 ```
 $ pass2csv --help
-usage: pass2csv [-h] [-a] [-b GPGBINARY] [-x] [-l LOGIN_FIELDS [LOGIN_FIELDS ...]] [-u]
+usage: pass2csv [-h] [-b path] [-g executable] [-a] [--encoding encoding] [-o file]
-                [-e EXCLUDE_ROWS [EXCLUDE_ROWS ...]]
+                [-e pattern [pattern ...]] [-f name pattern] [-l name pattern]
-                path
+                store_path
 positional arguments:
-  path                  path to the password-store folder to export
+  store_path            path to the password-store to export
 optional arguments:
  -h, --help            show this help message and exit
-  -a, --agent           ask gpg to use its auth agent
+  -b path, --base path  path to use as base for grouping passwords
-  -b GPGBINARY, --gpgbinary GPGBINARY
+  -g executable, --gpg executable
-                        path to the gpg binary you wish to use
+                        path to the gpg binary you wish to use (default 'gpg')
-  -x, --kpx             format the CSV for KeePassXC
+  -a, --use-agent       ask gpg to use its auth agent
-  -l LOGIN_FIELDS [LOGIN_FIELDS ...], --login-fields LOGIN_FIELDS [LOGIN_FIELDS ...]
+  --encoding encoding   text encoding to use when reading gpg output (default
-                        strings to interpret as names of login fields (only used with -x)
+                        'utf-8')
-  -u, --get-url         match row starting with 'url:' and extract it (only used with -x)
+  -o file, --outfile file
-  -e EXCLUDE_ROWS [EXCLUDE_ROWS ...], --exclude-rows EXCLUDE_ROWS [EXCLUDE_ROWS ...]
+                        file to write exported data to (default stdin)
-                        regexps to exclude from the notes field (only used with -x)
+  -e pattern [pattern ...], --exclude pattern [pattern ...]
                        regexps for lines which should not be exported
  -f name pattern, --get-field name pattern
                        a name and a regexp, the part of the line matching the
                        regexp will be removed and the remaining line will be added
                        to a field with the chosen name. only one match per
                        password, matching stops after the first match
  -l name pattern, --get-line name pattern
                        a name and a regexp for which all lines that match are
                        included in a field with the chosen name
 ```
-## Export format
+### Format
 There are two ways to export CSV data:
-1.  The format for the KeePass Generic CSV Importer:
+The output format is
-        Group(/),Title,Password,Notes
+    Group(/),Title,Password,[custom fields...],Notes
-    Where 'Password' is the first line of the entry in `pass` and
+You may add custom fields with `--get-field` or `--get-line`. You supply
-    'Notes' are all subsequent lines. '\\' should not be interpreted as
+a name for the field and a regexp pattern. The field name is used for
-    an escape character.
+the header of the output CSV and to group multiple patterns for the same
-
+field; you may specify multiple patterns for the same field by
-    This is the default mode.
+specifying `--get-field` or`--get-line` multiple times with the same
-
+name. Regexps are case-insensitive.
 2.  The format for the KeePassXC Importer:
        Group(/),Title,Login,Password,URL,Notes
    Where 'Password' is the first line of the entry in `pass`, 'User' is
    configured with `-l`, URL is extracted if `-u` is
    set, and 'Notes' contains any other fields that do not match
    `-e`.
    'User' field is chosen by searching for the first field with a name
    set by `-l`. Once the field is found, the login is set and the field
    is removed from notes.
    Use `-x` or `--kpx` to enable this mode.
-### Example KeePassXC Import
+### Examples
 - Cmd line
-        pass2csv ~/.password-store -x -l username login email -u -e '^---$'
+* Password entry (`~/.password-store/sites/example/login.gpg`):
- Password entry (`sites/example`)
+```
 password123
 ---
 username: user_name
 email user@example.com
 url:example.com
 Some note
 ```
-        password123
+* Command
        ---
        username: user_name
        email: user@example.com
        url: example.com
        some_note
- Output CSV row (formatted)
+```
 pass2csv ~/.password-store \
  --exclude '^---$' \
  --get-field Username '(username|email):?' \
  --get-field URL 'url:?'
 ```
-        sites, example, user_name, password123, example.com, "email: user@example.com\nsome_note"
+* Output
- `user_name` was chosen because `username` was the first argument to `-l`.
+```
- Both login and URL fields were excluded from the notes field because they
+Group(/),Title,Password,URL,Username,Notes
-  were used in another field.
+sites/example,login,password123,example.com,user_name,"email user@example.com\nSome note"
- `---` Was not included in the notes field because it was matched by `-e`.
+```
-### Example KeePass Generic CSV Importer
+### Grouping
 - Cmd line
-        pass2csv ~/.password-store
+The group is relative to the path, or the --base if given.
 Given the password `~/.password-store/site/login/password.gpg`:
- Password entry: Same as above
+    $ pass2csv ~/.password-store/site
- Output CSV row (formatted)
+        # Password will have group "login"
-        sites, example, password123, "---\nusername: user_name\nemail: user@example.com\nurl: example.com\nsome_note"
+    $ pass2csv ~/.password-store/site --base:~/.password-store
        # Password will have group "site/login"
 ## Development
 Create a virtual environment:
    python3 -m venv venv
@ -115,6 +117,7 @@ you can install `pip-tools`. The latter is recommended.
 ### pip-tools
 [pip-tools](https://github.com/jazzband/pip-tools) can keep your virtual
 environment in sync with the `requirements.txt` file, as well as
 compiling a new `requirements.txt` when adding/removing a dependency in
--- a/443
+++ b/443
@ -1,214 +1,277 @@
 #!/usr/bin/env python3
 import argparse
 import csv
 import logging
 import os
 import re
 import pathlib
-from argparse import Action, ArgumentParser, FileType
+import re
 import sys
 import gnupg
-logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
-class CSVExporter:
+def set_meta(entry, path, grouping_base):
-    def __init__(self, kpx_format, login_fields, get_url, exclude_rows):
+    pure_path = pathlib.PurePath(path)
-        logging.basicConfig(level=logging.INFO)
+    group = pure_path.relative_to(grouping_base).parent
-        self.logger = logger
+    if group.name == '':
        group = ''
    entry['group'] = group
    entry['title'] = pure_path.stem
        # Set to True to allow for alternate password csv to be created
        # See README for differences
        self.kpx_format = kpx_format
-        if self.kpx_format:
+def set_data(entry, data, exclude, get_fields, get_lines):
-            # A list of possible fields (in order) that could be converted to
+    lines = data.splitlines()
-            # login fields
+    tail = lines[1:]
-            self.login_fields = login_fields or []
+    entry['password'] = lines[0]
            # Set to True to extract url fields
            self.get_url = get_url
            # A regular expression list of lines that should be excluded from
            # the notes field
            self.exclude_rows = exclude_rows or []
-        self.logger.info("Using KPX format: %s", self.kpx_format)
+    filtered_tail = []
-
+    for line in tail:
-    def traverse(self, path):
+        for exclude_pattern in exclude:
-        for root, dirs, files in os.walk(path):
+            if exclude_pattern.search(line):
-            if '.git' in dirs:
+                break
                dirs.remove('.git')
            for name in files:
                yield os.path.join(root, name)
    def get_metadata(self, notes_raw):
        lines = notes_raw.split('\n')
        # A list of lines to keep as notes (will be joined by newline)
        notes = []
        # The extracted user field
        user = ''
        # The extracted URL field
        url = ''
        # This will extract each field name (for example, if a line in notes
        # was `user: user1`, fields should contain 'user')
        all_fields = set()
        for line in lines:
            field_search = re.search('^(.*) ?: ?.*$', line, re.I)
            if field_search:
                all_fields.add(field_search.group(1))
        # Check if any of the fields match the login names
        login_fields = [
            field for field in self.login_fields if field in all_fields
        ]
        # Get the field to use for the login. Since self.login_fields is in order,
        # the 0th element will contain the first match
        login_field = None if not login_fields else login_fields[0]
        # Iterate through the file again to build the return array
        for line in lines:
            # If any of the exclusion patterns match, ignore the line
            if [pattern for pattern in self.exclude_rows if re.search(pattern, line, re.I)]:
                continue
            if login_field:
                user_search = re.search(
                    '^' + login_field + ' ?: ?(.*)$', line, re.I)
                if user_search:
                    user = user_search.group(1)
                    # The user was matched, don't add it to notes
                    continue
            if self.get_url:
                url_search = re.search('^url ?: ?(.*)$', line, re.I)
                if url_search:
                    url = url_search.group(1)
                    # The url was matched, don't add it to notes
                    continue
            notes.append(line)
        return (user, url, '\n'.join(notes).strip())
    def parse(self, basepath, path, data):
        p = pathlib.Path(path)
        name = p.stem
        self.logger.info("Processing %s", name)
        group = os.path.dirname(os.path.relpath(path, basepath))
        split_data = data.split('\n', maxsplit=1)
        password = split_data[0]
        # Perform if/else in case there are no notes for a field
        notes = split_data[1] if len(split_data) > 1 else ""
        if self.kpx_format:
            # We are using the advanced format; try extracting user and url
            user, url, notes = self.get_metadata(notes)
            return [group, name, user, password, url, notes]
        else:
-            # We are not using KPX format; just use notes
+            filtered_tail.append(line)
-            return [group, name, password, notes]
+
    matching_indices = set()
    fields = entry.setdefault('fields', {})
    for i, line in enumerate(filtered_tail):
        for (name, pattern) in get_fields:
            if name in fields:
                # multiple patterns with same name, we've already found a match
                continue
            match = pattern.search(line)
            if not match:
                continue
            inverse_match = line[0:match.start()] + line[match.end():]
            value = inverse_match.strip()
            fields[name] = value
            matching_indices.add(i)
            break
    matching_lines = {}
    for i, line in enumerate(filtered_tail):
        for (name, pattern) in get_lines:
            match = pattern.search(line)
            if not match:
                continue
            matches = matching_lines.setdefault(name, [])
            matches.append(line)
            matching_indices.add(i)
            break
    for name, matches in matching_lines.items():
        fields[name] = '\n'.join(matching_lines)
    final_tail = []
    for i, line in enumerate(filtered_tail):
        if i not in matching_indices:
            final_tail.append(line)
    entry['notes'] = '\n'.join(final_tail).strip()
-def main(gpgbinary, use_agent, pass_path, base_path,
+def write(file, entries, get_fields, get_lines):
-         kpx_format, login_fields, get_url, exclude_rows, outfile):
+    get_field_names = set(x[0] for x in get_fields)
-    exporter = CSVExporter(kpx_format, login_fields, get_url, exclude_rows)
+    get_line_names = set(x[0] for x in get_lines)
-    gpg = gnupg.GPG(use_agent=use_agent, gpgbinary=gpgbinary)
+    field_names = get_field_names | get_line_names
-    gpg.encoding = 'utf-8'
+    header = ["Group(/)", "Title", "Password", *field_names, "Notes"]
-    csv_data = []
+    csvw = csv.writer(file)
-    for file_path in exporter.traverse(pass_path):
+    logging.info("Writing data to %s", file.name)
-        if os.path.splitext(file_path)[1] == '.gpg':
+    csvw.writerow(header)
-            with open(file_path, 'rb') as f:
+    for entry in entries:
-                data = str(gpg.decrypt_file(f))
+        fields = [entry['fields'].get(name) for name in field_names]
-                if len(data) == 0:
+        columns = [
-                    logger.warning("Could not decrypt %s or it is empty.", file_path)
+            entry['group'], entry['title'], entry['password'],
-                base = base_path if base_path else pass_path
+            *fields,
-                parsed = exporter.parse(base, file_path, data)
+            entry['notes']
-                csv_data.append(parsed)
+        ]
-
+        csvw.writerow(columns)
    writer = csv.writer(outfile, delimiter=',')
    writer.writerows(csv_data)
    outfile.close()
-class ExtendAction(Action):
+def main(store_path, grouping_base, outfile, gpgbinary, use_agent, encoding,
-    # Python 3.8 has 'extend' built in.
+         exclude, get_fields, get_lines):
-    def __call__(self, parser, namespace, values, option_string=None):
+    entries = []
-        items = getattr(namespace, self.dest) or []
+    failures = []
-        items.extend(values)
+    path = pathlib.Path(store_path)
-        setattr(namespace, self.dest, items)
+    grouping_path = pathlib.Path(grouping_base)
    gpg = gnupg.GPG(gpgbinary=gpgbinary, use_agent=use_agent)
    gpg.encoding = encoding
    for file in path.glob('**/*.gpg'):
        logging.info("Processing %s", file)
        with open(file, 'rb') as fp:
            decrypted = gpg.decrypt_file(fp)
        if not decrypted.ok:
            logging.error("Could not decrypt %s: %s", file, decrypted.status)
            failures.append((file, decrypted.status))
            continue
        entry = {}
        set_meta(entry, file, grouping_path)
        set_data(entry, str(decrypted), exclude, get_fields, get_lines)
        entries.append(entry)
    if failures:
        for file, status in failures:
            logging.warning("Could not decrypt %s: %s", file, status)
    write(outfile, entries, get_fields, get_lines)
-class OptionsParser(ArgumentParser):
+def parse_args(args):
-    def __init__(self, *args, **kwargs):
+    class ExtendAction(argparse.Action):
-        super().__init__(*args, **kwargs)
+        # Python 3.8 has 'extend' built in.
        def __call__(self, parser, namespace, values, option_string=None):
            items = getattr(namespace, self.dest) or []
            items.extend(values)
            setattr(namespace, self.dest, items)
-        self.add_argument(
+    parser = argparse.ArgumentParser()
-            'pass_path',
+    parser.add_argument(
-            metavar='path',
+        'store_path',
-            type=str,
+        type=str,
-            help="path to the password-store folder to export",
+        help="path to the password-store to export",
-        )
+    )
-
+
-        self.add_argument(
+    parser.add_argument(
-            '-b', '--base',
+        '-b', '--base',
-            type=str,
+        metavar='path',
-            help="path to use as base for grouping passwords",
+        type=str,
-            dest='base_path'
+        help="path to use as base for grouping passwords",
-        )
+        dest='base_path'
-
+    )
-        self.add_argument(
+
-            '-a', '--agent',
+    parser.add_argument(
-            action='store_true',
+        '-g', '--gpg',
-            help="ask gpg to use its auth agent",
+        metavar='executable',
-            dest='use_agent',
+        type=str,
-        )
+        default="gpg",
-
+        help="path to the gpg binary you wish to use (default 'gpg')",
-        self.add_argument(
+        dest='gpgbinary'
-            '-g', '--gpgbinary',
+    )
-            type=str,
+
-            help="path to the gpg binary you wish to use",
+    parser.add_argument(
-            dest='gpgbinary',
+        '-a', '--use-agent',
-            default="gpg"
+        action='store_true',
-        )
+        default=False,
-
+        help="ask gpg to use its auth agent",
-        self.add_argument(
+        dest='use_agent'
-            '-o', '--outfile',
+    )
-            type=FileType('w'),
+
-            help="Store to an output file",
+    parser.add_argument(
-            dest='outfile',
+        '--encoding',
-            default="-"
+        metavar='encoding',
-        )
+        type=str,
-
+        default="utf-8",
-        self.add_argument(
+        help="text encoding to use when reading gpg output (default 'utf-8')",
-            '-x', '--kpx',
+        dest='encoding'
-            action='store_true',
+    )
-            help="format the CSV for KeePassXC",
+
-            dest='kpx_format',
+    parser.add_argument(
-        )
+        '-o', '--outfile',
-
+        metavar='file',
-        self.add_argument(
+        type=argparse.FileType('w'),
-            '-l', '--login-fields',
+        default="-",
-            action=ExtendAction,
+        help="file to write exported data to (default stdin)",
-            nargs='+',
+        dest='outfile'
-            type=str,
+    )
-            help="strings to interpret as names of login fields (only used with -x)"
+
-        )
+    parser.add_argument(
-
+        '-e', '--exclude',
-        self.add_argument(
+        metavar='pattern',
-            '-u', '--get-url',
+        action=ExtendAction,
-            action='store_true',
+        nargs='+',
-            help="match row starting with 'url:' and extract it (only used with -x)"
+        type=str,
-        )
+        default=[],
-
+        help="regexps for lines which should not be exported",
-        self.add_argument(
+        dest='exclude'
-            '-e', '--exclude-rows',
+    )
-            action=ExtendAction,
+
-            nargs='+',
+    parser.add_argument(
-            type=str,
+        '-f', '--get-field',
-            help="regexps to exclude from the notes field (only used with -x)"
+        metavar=('name', 'pattern'),
        action='append',
        nargs=2,
        type=str,
        default=[],
        help=(
            "a name and a regexp, the part of the line matching the regexp"
            " will be removed and the remaining line will be added to a field"
            " with the chosen name. only one match per password, matching"
            " stops after the first match"
        ),
        dest='get_fields'
    )
    parser.add_argument(
        '-l', '--get-line',
        metavar=('name', 'pattern'),
        action='append',
        nargs=2,
        type=str,
        default=[],
        help=(
            "a name and a regexp for which all lines that match are included"
            " in a field with the chosen name"
        ),
        dest='get_lines'
    )
    return parser.parse_args(args)
 def compile_regexp(pattern):
    try:
        regexp = re.compile(pattern, re.I)
    except re.error as e:
        logging.error(
            "Could not compile pattern '%s', %s at position %s",
            pattern.replace("'", "\\'"), e.msg, e.pos
        )
        return None
    return regexp
 if __name__ == '__main__':
-    PARSER = OptionsParser()
+    parsed = parse_args(sys.argv[1:])
-    ARGS = PARSER.parse_args()
+
-    main(**vars(ARGS))
+    failed = False
    exclude_patterns = []
    for pattern in parsed.exclude:
        regexp = compile_regexp(pattern)
        if not regexp:
            failed = True
        exclude_patterns.append(regexp)
    get_fields = []
    for (name, pattern) in parsed.get_fields:
        regexp = compile_regexp(pattern)
        if not regexp:
            failed = True
        get_fields.append((name, regexp))
    get_lines = []
    for (name, pattern) in parsed.get_lines:
        regexp = compile_regexp(pattern)
        if not regexp:
            failed = True
        get_lines.append((name, regexp))
    if failed:
        sys.exit(1)
    if parsed.base_path:
        grouping_base = parsed.base_path
    else:
        grouping_base = parsed.store_path
    kwargs = {
        'store_path': parsed.store_path,
        'grouping_base': grouping_base,
        'gpgbinary': parsed.gpgbinary,
        'use_agent': parsed.use_agent,
        'encoding': parsed.encoding,
        'outfile': parsed.outfile,
        'exclude': exclude_patterns,
        'get_fields': get_fields,
        'get_lines': get_lines
    }
    main(**kwargs)