diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..892bee3 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +pass.csv diff --git a/README.md b/README.md index 1289194..43eed17 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,72 @@ # pass2csv -Needs [python-gnupg](https://pypi.python.org/pypi/python-gnupg). Run with path -to password store as argument. +Needs [python-gnupg](https://pypi.python.org/pypi/python-gnupg) and python3. +Run with path to password store as argument: -The csv is written to `pass.csv`. The format for the KeePass Generic CSV -Importer is: +``` +python3 -m pip install --user python-gnupg +python3 pass2csv.py ~/.password-store +``` -`Group(/),Title,Password,Notes` +There are two ways to export CSV data: -Where 'Password' is the first line of the entry in `pass` and 'Notes' are all -subsequent lines. '\\' should not be interpreted as an escape character. +1. The format for the KeePass Generic CSV Importer: + + Group(/),Title,Password,Notes + + Where 'Password' is the first line of the entry in `pass` and 'Notes' are all + subsequent lines. '\\' should not be interpreted as an escape character. + + To enable, set `KPX_FORMAT=False` in `pass2csv.py` + +2. The format for the KeePassXC Importer: + + Group(/),Title,Login,Password,URL,Notes + + Where 'Password' is the first line of the entry in `pass`, 'User' is configured + with `LOGIN_FIELDS`, URL is extracted if `GET_URL` is set, and 'Notes' contains + any other fields that do not match `EXCLUDE_ROWS`. + + To enable, set `KPX_FORMAT=True` and configure the variables mentioned above in + `pass2csv.py`. + + 'User' field is chosen by searching for the first field with a name in + LOGIN_FIELDS. Once the field is found, the login is set and the field is + removed from notes. + +### Example KeePassXC Import +- Variable definitions (`pass2csv.py`) + + KPX_FORMAT=True + + LOGIN_FIELDS=['username', 'login', 'email'] + GET_URL=True + EXCLUDE_ROWS=['^---$'] + +- Password entry (`sites/example`) + + password123 + --- + username: user_name + email: user@example.com + url: example.com + some_note + +- Output CSV row (formatted) + + sites, example, user_name, password123, example.com, "email: user@example.com\nsome_note" + +- `user_name` was chosen because `username` was the first filled entry in + `LOGIN_FIELDS`. +- Both logn and URL fields were excluded from the notes field because they were used +in another field. +- `---` Was not included in the notes field because it was matched in `EXCLUDE_ROWS`. + +### Example KeePass Generic CSV Importer +- Variable definitions (`pass2csv.py`) + + KPX_FORMAT=False + +- Password entry: Same as above +- Output CSV row (formatted) + + sites, example, password123, "---\nusername: user_name\nemail: user@example.com\nurl: example.com\nsome_note" diff --git a/pass2csv.py b/pass2csv.py old mode 100644 new mode 100755 index dc5c0a0..d68b538 --- a/pass2csv.py +++ b/pass2csv.py @@ -1,8 +1,27 @@ +#!/usr/bin/env python3 import csv import os import sys import gnupg +import re +import logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# Set to True to allow for alternate password csv to be created +# See README for differences +KPX_FORMAT=True + +if KPX_FORMAT: + # A list of possible fields (in order) that could be converted to login fields + LOGIN_FIELDS=['login', 'user', 'username', 'email'] + # Set to True to extract url fields + GET_URL=True + # A regular expression list of lines that should be excluded from the notes field + EXCLUDE_ROWS=['^---$', '^autotype ?: ?'] + +logger.info("Using KPX format: %s", KPX_FORMAT) def traverse(path): for root, dirs, files in os.walk(path): @@ -11,14 +30,68 @@ def traverse(path): for name in files: yield os.path.join(root, name) +def getMetadata(notes_raw): + lines = notes_raw.split('\n') + + # A list of lines to keep as notes (will be joined by newline) + notes = [] + # The extracted user field + user = '' + # The extracted URL field + url = '' + + # This will extract each field name (for example, if a line in notes was `user: user1`, fields should contain 'user') + all_fields = set() + for line in lines: + field_search = re.search('^(.*) ?: ?.*$', line, re.I) + if field_search: + all_fields.add(field_search.group(1)) + + # Check if any of the fields match the login names + login_fields = [field for field in LOGIN_FIELDS if field in all_fields] + # Get the field to use for the login. Since LOGIN_FIELDS is in order, the 0th element will contain the first match + login_field = None if not login_fields else login_fields[0] + + # Iterate through the file again to build the return array + for line in lines: + + # If any of the exclusion patterns match, ignore the line + if len([pattern for pattern in EXCLUDE_ROWS if re.search(pattern, line, re.I)]) != 0: + continue + + if login_field: + user_search = re.search('^' + login_field + ' ?: ?(.*)$', line, re.I) + if user_search: + user = user_search.group(1) + # The user was matched, don't add it to notes + continue + + if GET_URL: + url_search = re.search('^url ?: ?(.*)$', line, re.I) + if url_search: + url = url_search.group(1) + # The url was matched, don't add it to notes + continue + + notes.append(line) + + return (user, url, '\n'.join(notes).strip()) def parse(basepath, path, data): name = os.path.splitext(os.path.basename(path))[0] group = os.path.dirname(os.path.os.path.relpath(path, basepath)) split_data = data.split('\n', maxsplit=1) password = split_data[0] - notes = split_data[1] - return [group, name, password, notes] + # Perform if/else in case there are no notes for a field + notes = split_data[1] if len(split_data) > 1 else "" + logger.info("Processing %s" % (name,)) + if KPX_FORMAT: + # We are using the advanced format; try extracting user and url + user, url, notes = getMetadata(notes) + return [group, name, user, password, url, notes] + else: + # We are not using KPX format; just use notes + return [group, name, password, notes] def main(path):