Extracting meta data and attachments from Lotus Notes

Following my earlier post about extracting data from Lotus Notes using python.

I have modified the python code to lookup certain fields in Lotus Domino and take the meta data from the Lotus Document, then apply formatting and extract to Rich Text Format.

The script writes the names of any attachments embedded within the body of the document. When you run both files you will end up with RTF document with meta data and their corresponding attachments.

I am using a python module called PyRTF to format the data into a document to make it easier to read. It can handle image extraction but I have not completed that part yet.

The python script is as follows for extraction of data from Lotus Domino to RTF:
__author__ = ‘Mark Jones’
import sys
import re
import win32com.client
import datetime
import textwrap
import os

sys.path.append( ‘../’ )

from PyRTF import *

#Define the session
notesServer = “”
notesFile = “Lotus_db.nsf”
notesPass = “LotusPassword”
#Connect to notes database on server
notesSession = win32com.client.Dispatch(‘Lotus.NotesSession’)
notesSession.Initialize(notesPass)
notesDatabase = notesSession.GetDatabase(notesServer,notesFile)

#Build the view and get first document in view
notesView = notesDatabase.GetView(‘All Documents’)
notesDoc = notesView.GetFirstDocument()

#Function that iterates through documents in view
def makeDocumentGenerator(folderName):
# Get folder
folder = notesDatabase.GetView(folderName)
if not folder:
raise Exception(‘Folder “%s” not found’ % folderName)
# Get the first document
document = folder.GetFirstDocument()
# If the document exists,
while document:
# Yield it
yield document
# Get the next document
document = folder.GetNextDocument(document)

#Creates the folder path for files
def createPath(path):
if not os.path.isdir(path):
os.mkdir(path)

#This creates the rich text file using pyrtf
def example1(subject, categories, author, datecreated, docID, body, attachment):
doc = Document()
ss = doc.StyleSheet
section = Section()
section.Header.append(‘Extract from Lotus Domino – %s’ % datetime.date.today())
section.Header.append(‘Lotus Notes document ID: %s’ % docID)
section.Header.append(‘___________________________________________’)
doc.Sections.append(section)
sub_p = Paragraph(ss.ParagraphStyles.Heading1)
sub_p.append(‘Subject: ‘ + subject)
section.append(sub_p)
cat_p = Paragraph(ss.ParagraphStyles.Normal)
cat_p.append(‘Category: ‘ + categories)
section.append(cat_p)
section.append(Paragraph(‘Date Created: ‘ + datecreated))
section.append(Paragraph(‘Author: ‘ + author))
section.append(”)
section.append(Paragraph(‘Attachments: %s’ % str(attachment)))
section.append(”)
section.append(”)
section.append(Paragraph(‘Content: %s’ % body))

p = Paragraph()
p.append(‘Page ‘, PAGE_NUMBER, ‘ of ‘, SECTION_PAGES)
section.Footer.append(p)
return doc

#Saves the document
def openfile(subject, categories):
cleanedup_subject = re.sub(r'[/\\:*?”<>|]’, ”, subject)
cleanedup_categories = re.sub(r'[/\\:*?”<>|]’, ”, categories)
processed_categories = str(cleanedup_categories)
processed_subject = str(cleanedup_subject)
filePath = ‘C:\\TMP1\\%s’ % processed_categories
createPath(filePath)
os.chdir(filePath)
return file(‘C:\\TMP1\\%s\\%s.rtf’ % (processed_categories, processed_subject), ‘w’)

#Builds the document
def documentmaker(subject, categories, author, datecreated, docID, body, attachment):
DR = Renderer()
doc1 = example1(subject, categories, author, datecreated, docID, body, attachment)
DR.Write(doc1, openfile(subject, categories))
print “Document created: %s” % subject

#Finds out if any attachments are attached so they can be listed in RTF
def fileNameTitle(document):
for whichItem in xrange(len(document.Items)):
item = document.Items[whichItem]
if item.Name == ‘$FILE’:
fileName = item.Values[0]
print fileName
return fileName

def createPath(path):
if not os.path.isdir(path):
os.mkdir(path)

#Main program which gets the values from Lotus Notes and passes them to pyRTF for creation
def main():
for document in makeDocumentGenerator(‘All Documents’):
subject = str(document.GetItemValue(‘Subject’)[0].encode(‘utf-8’).strip())
categories = str(document.GetItemValue(‘Categories’)[0].strip())
author = str(document.GetItemValue(‘DocumentAuthors’)[0].strip())
datecreated = str(document.GetItemValue(‘Date’)[0])
docID = str(document.UniversalID.strip()) #Get the document ID to make filename unique
tbody = document.GetItemValue(‘body’)[0].encode(‘utf-8’)
body = str(tbody)
attachment = fileNameTitle(document)
documentmaker(subject, categories, author, datecreated, docID, body, attachment)

main()


 

This is the python code for extracting the attachments to the same folder as the meta data:
__author__ = ‘mark.jones’
# Import system modules
import os
import tempfile
import sys
import csv
import re
import win32com.client

#Define the session
notesServer = “”
notesFile = “Lotus_DB.nsf”
notesPass = “LotusPassword”
#Connect to notes database on server
notesSession = win32com.client.Dispatch(‘Lotus.NotesSession’)
notesSession.Initialize(notesPass)
notesDatabase = notesSession.GetDatabase(notesServer,notesFile)

#Build the view and get first document in view
notesView = notesDatabase.GetView(‘All Documents’)
notesDoc = notesView.GetFirstDocument()

#Function that iterates through documents in view
def makeDocumentGenerator(folderName):
# Get folder
folder = notesDatabase.GetView(folderName)
if not folder:
raise Exception(‘Folder “%s” not found’ % folderName)
# Get the first document
document = folder.GetFirstDocument()
# If the document exists,
while document:
# Yield it
yield document
# Get the next document
document = folder.GetNextDocument(document)

#Function to create folders
def createPath(path):
if not os.path.isdir(path):
os.mkdir(path)

#Main program which gathers attachment data and extracts to file
def main():
for document in makeDocumentGenerator(‘All Documents’):
try:
for whichItem in xrange(len(document.Items)):
item = document.Items[whichItem]
if item.Name == ‘$FILE’:
subject = document.GetItemValue(‘Subject’)[0].strip()
categories = document.GetItemValue(‘Categories’)[0].strip()
docID = str(document.UniversalID.strip()) #Get the document ID to make filename unique
fileName = item.Values[0].encode(‘utf-8’).strip()
attachment = document.GetAttachment(fileName) #Get the attachment from the document

newName = fileName #Append company name to file name
print “NEW File Name: ” + newName #Validation print
filePath = str(“C:\\TMP1\\” + categories)
createPath(filePath)
os.chdir(filePath)
attachment.ExtractFile(“” + newName) #Extract attachment to disk next to python script
except:
print “Error”
pass

main()