Merge pull request #4 from stonewareslord/master

Add support for KeePassXC import; Many misc improvements
2018-08-16 11:31:42 +02:00 · 2018-08-16 11:31:42 +02:00 · 6a6c49fdc8
commit 6a6c49fdc8
parent ccf3ece8da 1ab2a22301
3 changed files with 144 additions and 9 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
+pass.csv
--- a/README.md
+++ b/README.md
@ -1,11 +1,72 @@
 # pass2csv
-Needs [python-gnupg](https://pypi.python.org/pypi/python-gnupg). Run with path
-to password store as argument.
+Needs [python-gnupg](https://pypi.python.org/pypi/python-gnupg) and python3.
+Run with path to password store as argument:

-The csv is written to `pass.csv`. The format for the KeePass Generic CSV
-Importer is:
+```
+python3 -m pip install --user python-gnupg
+python3 pass2csv.py ~/.password-store
+```

-`Group(/),Title,Password,Notes`
+There are two ways to export CSV data:

-Where 'Password' is the first line of the entry in `pass` and 'Notes' are all
-subsequent lines. '\\' should not be interpreted as an escape character.
+1.  The format for the KeePass Generic CSV Importer:
+
+        Group(/),Title,Password,Notes
+
+    Where 'Password' is the first line of the entry in `pass` and 'Notes' are all
+    subsequent lines. '\\' should not be interpreted as an escape character.
+
+    To enable, set `KPX_FORMAT=False` in `pass2csv.py`
+
+2.  The format for the KeePassXC Importer:
+
+        Group(/),Title,Login,Password,URL,Notes
+
+    Where 'Password' is the first line of the entry in `pass`, 'User' is configured
+    with `LOGIN_FIELDS`, URL is extracted if `GET_URL` is set, and 'Notes' contains
+    any other fields that do not match `EXCLUDE_ROWS`.
+
+    To enable, set `KPX_FORMAT=True` and configure the variables mentioned above in
+    `pass2csv.py`.
+
+    'User' field is chosen by searching for the first field with a name in
+    LOGIN_FIELDS. Once the field is found, the login is set and the field is
+    removed from notes.
+
+### Example KeePassXC Import
+- Variable definitions (`pass2csv.py`)
+
+        KPX_FORMAT=True
+
+        LOGIN_FIELDS=['username', 'login', 'email']
+        GET_URL=True
+        EXCLUDE_ROWS=['^---$']
+
+- Password entry (`sites/example`)
+
+        password123
+        ---
+        username: user_name
+        email: user@example.com
+        url: example.com
+        some_note
+
+- Output CSV row (formatted)
+
+        sites, example, user_name, password123, example.com, "email: user@example.com\nsome_note"
+
+- `user_name` was chosen because `username` was the first filled entry in
+  `LOGIN_FIELDS`.
+- Both logn and URL fields were excluded from the notes field because they were used
+in another field.
+- `---` Was not included in the notes field because it was matched in `EXCLUDE_ROWS`.
+
+### Example KeePass Generic CSV Importer
+- Variable definitions (`pass2csv.py`)
+
+        KPX_FORMAT=False
+
+- Password entry: Same as above
+- Output CSV row (formatted)
+
+        sites, example, password123, "---\nusername: user_name\nemail: user@example.com\nurl: example.com\nsome_note"
--- a/pass2csv.py
+++ b/pass2csv.py
@ -1,8 +1,27 @@
+#!/usr/bin/env python3
 import csv
 import os
 import sys
 import gnupg
+import re
+import logging

+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+# Set to True to allow for alternate password csv to be created
+# See README for differences
+KPX_FORMAT=True
+
+if KPX_FORMAT:
+    # A list of possible fields (in order) that could be converted to login fields
+    LOGIN_FIELDS=['login', 'user', 'username', 'email']
+    # Set to True to extract url fields
+    GET_URL=True
+    # A regular expression list of lines that should be excluded from the notes field
+    EXCLUDE_ROWS=['^---$', '^autotype ?: ?']
+
+logger.info("Using KPX format: %s", KPX_FORMAT)

 def traverse(path):
    for root, dirs, files in os.walk(path):
@ -11,14 +30,68 @@ def traverse(path):
        for name in files:
            yield os.path.join(root, name)

+def getMetadata(notes_raw):
+    lines = notes_raw.split('\n')
+
+    # A list of lines to keep as notes (will be joined by newline)
+    notes = []
+    # The extracted user field
+    user = ''
+    # The extracted URL field
+    url = ''
+
+    # This will extract each field name (for example, if a line in notes was `user: user1`, fields should contain 'user')
+    all_fields = set()
+    for line in lines:
+        field_search = re.search('^(.*) ?: ?.*$', line, re.I)
+        if field_search:
+            all_fields.add(field_search.group(1))
+
+    # Check if any of the fields match the login names
+    login_fields = [field for field in LOGIN_FIELDS if field in all_fields]
+    # Get the field to use for the login. Since LOGIN_FIELDS is in order, the 0th element will contain the first match
+    login_field = None if not login_fields else login_fields[0]
+
+    # Iterate through the file again to build the return array
+    for line in lines:
+
+        # If any of the exclusion patterns match, ignore the line
+        if len([pattern for pattern in EXCLUDE_ROWS if re.search(pattern, line, re.I)]) != 0:
+            continue
+
+        if login_field:
+            user_search = re.search('^' + login_field + ' ?: ?(.*)$', line, re.I)
+            if user_search:
+                user = user_search.group(1)
+                # The user was matched, don't add it to notes
+                continue
+
+        if GET_URL:
+            url_search = re.search('^url ?: ?(.*)$', line, re.I)
+            if url_search:
+                url = url_search.group(1)
+                # The url was matched, don't add it to notes
+                continue
+
+        notes.append(line)
+
+    return (user, url, '\n'.join(notes).strip())

 def parse(basepath, path, data):
    name = os.path.splitext(os.path.basename(path))[0]
    group = os.path.dirname(os.path.os.path.relpath(path, basepath))
    split_data = data.split('\n', maxsplit=1)
    password = split_data[0]
-    notes = split_data[1]
-    return [group, name, password, notes]
+    # Perform if/else in case there are no notes for a field
+    notes = split_data[1] if len(split_data) > 1 else ""
+    logger.info("Processing %s" % (name,))
+    if KPX_FORMAT:
+        # We are using the advanced format; try extracting user and url
+        user, url, notes = getMetadata(notes)
+        return [group, name, user, password, url, notes]
+    else:
+        # We are not using KPX format; just use notes
+        return [group, name, password, notes]


 def main(path):