Vivek Pandey
2009-04-21 05:57:37 UTC
We have a two node Jackrabbit cluster with a db based repository and
journal. We are seeing some performance/data integrity issues when we run
multiple threads on same/different cluster nodes.
1) The time to add a document increases rapidly when using multiple threads
try to add a document. The average time taken to add a document with two
concurrent threads is nearly twice the time taken by a single thread.
2) We have seen issues with GLOBAL_REVISIONS table going out of sync with
all the local revisions in LOCAL_REVISIONS table. This causes further
entries to JOURNAL table to fail. For e.g. LOCAL_REVISON of node N1 is A ,
node N2 is (A-1), the last entry in JORUNAL has revision_id as A and global
revision is also (A-1) . This is a problematic scenario as further inserts
to JOURNAL fail.
Please help if we are doing something wrong here . Repository.xml is pasted
below.
<?xml version="1.0" encoding="ISO-8859-1"?>
<!DOCTYPE Repository PUBLIC "-//The Apache Software Foundation//DTD
Jackrabbit 1.2//EN"
"http://jackrabbit.apache.org/dtd/repository-1.2.dtd">
<Repository>
<DataStore class="org.apache.jackrabbit.core.data.db.DbDataStore">
<param name="driver" value="javax.naming.InitialContext"/>
<param name="url" value="jdbc/dbjournal"/>
<param name="databaseType" value="oracle"/>
<param name="minRecordLength" value="1"/>
<param name="copyWhenReading" value="true"/>
<param name="tablePrefix" value="JR_"/>
</DataStore>
<FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem">
<param name="path" value="${rep.home}/repository"/>
</FileSystem>
<Security appName="Jackrabbit">
<AccessManager
class="org.apache.jackrabbit.core.security.SimpleAccessManager">
</AccessManager>
<LoginModule
class="org.apache.jackrabbit.core.security.SimpleLoginModule">
<param name="anonymousId" value="anonymous"/>
</LoginModule>
</Security>
<Cluster id="node1" syncDelay="1800000">
<Journal
class="org.apache.jackrabbit.core.journal.OracleDatabaseJournal">
<param name="schemaObjectPrefix" value="JR_"/>
<param name="driver" value="javax.naming.InitialContext" />
<param name="url" value="jdbc/dbjournal" />
</Journal>
</Cluster>
<Workspaces rootPath="${rep.home}/workspaces" defaultWorkspace="default"/>
<Workspace name="TestWorkSpace">
<FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem">
<param name="path" value="${wsp.home}"/>
</FileSystem>
<PersistenceManager
class="org.apache.jackrabbit.core.persistence.db.OraclePersistenceManager">
<param name="driver" value="javax.naming.InitialContext" />
<param name="url" value="jdbc/dbjournal" />
<param name="schemaObjectPrefix" value="JR_"/>
<param name="tableSpace" value=""/>
<param name="externalBLOBs" value="false"/>
</PersistenceManager>
<SearchIndex
class="org.apache.jackrabbit.core.query.lucene.SearchIndex">
<param name="path" value="${wsp.home}/index"/>
</SearchIndex>
<ISMLocking
class="org.apache.jackrabbit.core.state.FineGrainedISMLocking"></ISMLocking>
</Workspace>
<Versioning rootPath="${rep.home}/version">
<FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem">
<param name="path" value="${rep.home}/version"/>
</FileSystem>
<PersistenceManager
class="org.apache.jackrabbit.core.persistence.db.OraclePersistenceManager">
<param name="driver" value="javax.naming.InitialContext" />
<param name="url" value="jdbc/dbjournal" />
<param name="schemaObjectPrefix" value="JR_"/>
<param name="tableSpace" value=""/>
<param name="externalBLOBs" value="false"/>
</PersistenceManager>
</Versioning>
<SearchIndex class="org.apache.jackrabbit.core.query.lucene.SearchIndex">
<param name="path" value="${rep.home}/repository/index"/>
</SearchIndex>
</Repository>
The code block we are using to add documents to repository is given below
public static String addDocument(InputStream inputStream) throws
Exception {
Session session = null;
try {
long start = System.currentTimeMillis();
ContentRepositoryConfiguration contentRep =
ContentRepositoryConfiguration.getInstance();
session = contentRep.getSession();
logger.debug(" got the session for addDocument");
Node rootNode = session.getRootNode();
Node fileNode =
rootNode.addNode("document"+((long)(System.currentTimeMillis() *
Math.random())), ContentRepositoryConstants.PRIMARY_NODE_TYPE_FILE);
fileNode.addMixin(ContentRepositoryConstants.MIXIN_TYPE_REFERENCEABLE);
Node content =
fileNode.addNode(ContentRepositoryConstants.PROPERTY_TYPE_CONTENT,
ContentRepositoryConstants.PRIMARY_NODE_TYPE_RESOURCE);
content.setProperty(ContentRepositoryConstants.PROPERTY_TYPE_MIMETYPE, "");
content.setProperty(ContentRepositoryConstants.PROPERTY_TYPE_LAST_MODIFIED,
Calendar.getInstance());
content.setProperty(ContentRepositoryConstants.PROPERTY_TYPE_ENCODING, "");
content.setProperty(ContentRepositoryConstants.PROPERTY_TYPE_DATA,
inputStream);
session.save();
return fileNode.getUUID();
} catch (Exception ex) {
logger.info("Exception occured while adding document : " + ex);
throw ex;
}
finally {
try {
session.logout();
inputStream.close();
} catch (Exception ex) {
logger.error("Exception occured while closing
inputStream/session");
}
}
}
Thanks,
Vivek
journal. We are seeing some performance/data integrity issues when we run
multiple threads on same/different cluster nodes.
1) The time to add a document increases rapidly when using multiple threads
try to add a document. The average time taken to add a document with two
concurrent threads is nearly twice the time taken by a single thread.
2) We have seen issues with GLOBAL_REVISIONS table going out of sync with
all the local revisions in LOCAL_REVISIONS table. This causes further
entries to JOURNAL table to fail. For e.g. LOCAL_REVISON of node N1 is A ,
node N2 is (A-1), the last entry in JORUNAL has revision_id as A and global
revision is also (A-1) . This is a problematic scenario as further inserts
to JOURNAL fail.
Please help if we are doing something wrong here . Repository.xml is pasted
below.
<?xml version="1.0" encoding="ISO-8859-1"?>
<!DOCTYPE Repository PUBLIC "-//The Apache Software Foundation//DTD
Jackrabbit 1.2//EN"
"http://jackrabbit.apache.org/dtd/repository-1.2.dtd">
<Repository>
<DataStore class="org.apache.jackrabbit.core.data.db.DbDataStore">
<param name="driver" value="javax.naming.InitialContext"/>
<param name="url" value="jdbc/dbjournal"/>
<param name="databaseType" value="oracle"/>
<param name="minRecordLength" value="1"/>
<param name="copyWhenReading" value="true"/>
<param name="tablePrefix" value="JR_"/>
</DataStore>
<FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem">
<param name="path" value="${rep.home}/repository"/>
</FileSystem>
<Security appName="Jackrabbit">
<AccessManager
class="org.apache.jackrabbit.core.security.SimpleAccessManager">
</AccessManager>
<LoginModule
class="org.apache.jackrabbit.core.security.SimpleLoginModule">
<param name="anonymousId" value="anonymous"/>
</LoginModule>
</Security>
<Cluster id="node1" syncDelay="1800000">
<Journal
class="org.apache.jackrabbit.core.journal.OracleDatabaseJournal">
<param name="schemaObjectPrefix" value="JR_"/>
<param name="driver" value="javax.naming.InitialContext" />
<param name="url" value="jdbc/dbjournal" />
</Journal>
</Cluster>
<Workspaces rootPath="${rep.home}/workspaces" defaultWorkspace="default"/>
<Workspace name="TestWorkSpace">
<FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem">
<param name="path" value="${wsp.home}"/>
</FileSystem>
<PersistenceManager
class="org.apache.jackrabbit.core.persistence.db.OraclePersistenceManager">
<param name="driver" value="javax.naming.InitialContext" />
<param name="url" value="jdbc/dbjournal" />
<param name="schemaObjectPrefix" value="JR_"/>
<param name="tableSpace" value=""/>
<param name="externalBLOBs" value="false"/>
</PersistenceManager>
<SearchIndex
class="org.apache.jackrabbit.core.query.lucene.SearchIndex">
<param name="path" value="${wsp.home}/index"/>
</SearchIndex>
<ISMLocking
class="org.apache.jackrabbit.core.state.FineGrainedISMLocking"></ISMLocking>
</Workspace>
<Versioning rootPath="${rep.home}/version">
<FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem">
<param name="path" value="${rep.home}/version"/>
</FileSystem>
<PersistenceManager
class="org.apache.jackrabbit.core.persistence.db.OraclePersistenceManager">
<param name="driver" value="javax.naming.InitialContext" />
<param name="url" value="jdbc/dbjournal" />
<param name="schemaObjectPrefix" value="JR_"/>
<param name="tableSpace" value=""/>
<param name="externalBLOBs" value="false"/>
</PersistenceManager>
</Versioning>
<SearchIndex class="org.apache.jackrabbit.core.query.lucene.SearchIndex">
<param name="path" value="${rep.home}/repository/index"/>
</SearchIndex>
</Repository>
The code block we are using to add documents to repository is given below
public static String addDocument(InputStream inputStream) throws
Exception {
Session session = null;
try {
long start = System.currentTimeMillis();
ContentRepositoryConfiguration contentRep =
ContentRepositoryConfiguration.getInstance();
session = contentRep.getSession();
logger.debug(" got the session for addDocument");
Node rootNode = session.getRootNode();
Node fileNode =
rootNode.addNode("document"+((long)(System.currentTimeMillis() *
Math.random())), ContentRepositoryConstants.PRIMARY_NODE_TYPE_FILE);
fileNode.addMixin(ContentRepositoryConstants.MIXIN_TYPE_REFERENCEABLE);
Node content =
fileNode.addNode(ContentRepositoryConstants.PROPERTY_TYPE_CONTENT,
ContentRepositoryConstants.PRIMARY_NODE_TYPE_RESOURCE);
content.setProperty(ContentRepositoryConstants.PROPERTY_TYPE_MIMETYPE, "");
content.setProperty(ContentRepositoryConstants.PROPERTY_TYPE_LAST_MODIFIED,
Calendar.getInstance());
content.setProperty(ContentRepositoryConstants.PROPERTY_TYPE_ENCODING, "");
content.setProperty(ContentRepositoryConstants.PROPERTY_TYPE_DATA,
inputStream);
session.save();
return fileNode.getUUID();
} catch (Exception ex) {
logger.info("Exception occured while adding document : " + ex);
throw ex;
}
finally {
try {
session.logout();
inputStream.close();
} catch (Exception ex) {
logger.error("Exception occured while closing
inputStream/session");
}
}
}
Thanks,
Vivek