Parser.java
/*
* GovWay - A customizable API Gateway
* https://govway.org
*
* Copyright (c) 2005-2024 Link.it srl (https://link.it).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 3, as published by
* the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package org.openspcoop2.utils.csv;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.List;
import java.util.Properties;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.io.input.BOMInputStream;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.CharEncoding;
import org.openspcoop2.utils.Utilities;
import org.openspcoop2.utils.UtilsException;
import org.openspcoop2.utils.regexp.RegExpNotFoundException;
import org.openspcoop2.utils.regexp.RegularExpressionEngine;
/**
* Parser
*
* @author Andrea Poli (apoli@link.it)
* @author $Author$
* @version $Rev$, $Date$
*/
public class Parser {
private static final String PARSER_NAME_PREFIX_= "mapping.field.";
private static final String PARSER_REGEXP_PREFIX_= "mapping.regexp.";
private static final String PARSER_REGEXP_SUFFIX_NOT_FOUND= ".notFound";
private static final String PARSER_DEFAULT_PREFIX_= "mapping.default.";
private static final String PARSER_CONSTANT_PREFIX_= "mapping.constant.";
private static final String PARSER_REQUIRED_PREFIX_= "mapping.required.";
private List<ParserMappingRecord> mapping;
public Parser(List<ParserMappingRecord> mapping) throws UtilsException {
if(mapping==null || mapping.size()<=0){
throw new UtilsException("Map is null");
}
for (int i = 0; i < mapping.size(); i++) {
if(mapping.get(i).getName()==null){
throw new UtilsException("RecordMapping["+i+"] without name");
}
if(mapping.get(i).getConstantValue()==null && mapping.get(i).getCsvPosition()==null && mapping.get(i).getCsvColumnName()==null ){
throw new UtilsException("RecordMapping["+i+"] name["+mapping.get(i).getName()+"] without almost one required field: constantValue, csvPosition, csvColumnName");
}
}
this.mapping = mapping;
}
public Parser(InputStream is,boolean positionMapping) throws UtilsException {
Properties p = null;
try{
p = new Properties();
p.load(is);
}catch(Exception e){
throw new UtilsException(e.getMessage(),e);
}
this.init(p,positionMapping);
}
public Parser(Properties properties,boolean positionMapping) throws UtilsException {
this.init(properties,positionMapping);
}
private void init(Properties properties,boolean positionMapping) throws UtilsException{
Properties names = Utilities.readProperties(PARSER_NAME_PREFIX_, properties);
Properties regexps = Utilities.readProperties(PARSER_REGEXP_PREFIX_, properties);
Properties constants = Utilities.readProperties(PARSER_CONSTANT_PREFIX_, properties);
Properties required = Utilities.readProperties(PARSER_REQUIRED_PREFIX_, properties);
Properties defaults = Utilities.readProperties(PARSER_DEFAULT_PREFIX_, properties);
if(names.size()<=0){
throw new UtilsException("No mapping exists");
}
this.mapping = new ArrayList<ParserMappingRecord>();
Enumeration<?> enNames = names.keys();
while (enNames.hasMoreElements()) {
String key = (String) enNames.nextElement();
String value = names.getProperty(key);
if(value==null){
throw new UtilsException("Property ["+PARSER_NAME_PREFIX_+key+"] without value");
}
value = value.trim();
ParserMappingRecord recordMapping = null;
if(constants.containsKey(key)){
if("true".equalsIgnoreCase(constants.getProperty(key).trim())){
// E' una costante
recordMapping = ParserMappingRecord.newCsvConstantRecord(key, value);
}
else if("false".equalsIgnoreCase(required.getProperty(key).trim())){
// verrà gestita come campo normale.
}
else{
throw new UtilsException("Property ["+PARSER_CONSTANT_PREFIX_+key+"] with wrong value (expected true/false): "+required.getProperty(key).trim());
}
}
if(recordMapping==null){
// non è una costante
String [] regExpr = null;
ParserRegexpNotFound regexpNotFoundBehaviour = null;
if(regexps.containsKey(key)){
if(regexps.getProperty(key)!=null){
List<String> r = new ArrayList<>();
r.add(regexps.getProperty(key).trim());
int index = 1;
while(regexps.containsKey(key+"."+index) && regexps.getProperty(key+"."+index)!=null){
r.add(regexps.getProperty(key+"."+index).trim());
index++;
}
regExpr = r.toArray(new String[1]);
if(regexps.containsKey(key+PARSER_REGEXP_SUFFIX_NOT_FOUND)){
if(regexps.getProperty(key+PARSER_REGEXP_SUFFIX_NOT_FOUND)!=null){
String tmp = regexps.getProperty(key+PARSER_REGEXP_SUFFIX_NOT_FOUND).trim();
ParserRegexpNotFound [] p = ParserRegexpNotFound.values();
for (int i = 0; i < p.length; i++) {
if(p[i].toString().equals(tmp)){
regexpNotFoundBehaviour = p[i];
break;
}
}
if(regexpNotFoundBehaviour==null){
throw new UtilsException("Property ["+PARSER_REGEXP_PREFIX_+key+PARSER_REGEXP_SUFFIX_NOT_FOUND+"] with wrong value (expected: "+ArrayUtils.toString(p)+"): "+tmp);
}
}
}
}
}
String defaultValue = null;
if(defaults.containsKey(key)){
if(defaults.getProperty(key)!=null){
defaultValue = defaults.getProperty(key).trim();
}
}
if(positionMapping){
try{
int intValue = Integer.parseInt(value);
if(intValue<0){
throw new Exception("Negative Number");
}
recordMapping = ParserMappingRecord.newCsvColumnPositionRecord(key, intValue, defaultValue, regexpNotFoundBehaviour, regExpr);
}catch(Exception e){
throw new UtilsException("Property ["+PARSER_NAME_PREFIX_+key+"] with wrong value (expected positive number): "+e.getMessage(),e);
}
}
else{
recordMapping = ParserMappingRecord.newCsvColumnNameRecord(key, value, defaultValue, regexpNotFoundBehaviour, regExpr);
}
if(required.containsKey(key)){
if("true".equalsIgnoreCase(required.getProperty(key).trim())){
recordMapping.setRequired(true);
}
else if("false".equalsIgnoreCase(required.getProperty(key).trim())){
recordMapping.setRequired(false);
}
else{
throw new UtilsException("Property ["+PARSER_REQUIRED_PREFIX_+key+"] with wrong value (expected true/false): "+required.getProperty(key).trim());
}
}
}
this.mapping.add(recordMapping);
}
}
public ParserResult parseCsvFile(Format format, String csvContent) throws UtilsException{
Reader reader = null;
try{
reader = new StringReader(csvContent);
return this.parseCsvFile(format, reader);
}finally{
try{
if(reader!=null){
reader.close();
}
}catch(Exception eClose){
// close
}
}
}
public ParserResult parseCsvFile(Format format, byte[] csv) throws UtilsException{
return this.parseCsvFile(format, csv, CharEncoding.UTF_8,false);
}
public ParserResult parseCsvFile(Format format, byte[] csv, String charset) throws UtilsException{
return this.parseCsvFile(format, csv, charset,false);
}
public ParserResult parseCsvFile(Format format, byte[] csv,boolean enableBomInputStream) throws UtilsException{
return this.parseCsvFile(format, csv, CharEncoding.UTF_8,enableBomInputStream);
}
public ParserResult parseCsvFile(Format format, byte[] csv, String charset,boolean enableBomInputStream) throws UtilsException{
ByteArrayInputStream bin = null;
try{
bin = new ByteArrayInputStream(csv);
return this.parseCsvFile(format, bin,charset,enableBomInputStream);
}finally{
try{
if(bin!=null){
bin.close();
}
}catch(Exception eClose){
// close
}
}
}
public ParserResult parseCsvFile(Format format, File file) throws UtilsException{
return this.parseCsvFile(format, file, CharEncoding.UTF_8, false);
}
public ParserResult parseCsvFile(Format format, File file, String charset) throws UtilsException{
return this.parseCsvFile(format, file, charset, false);
}
public ParserResult parseCsvFile(Format format, File file, boolean enableBomInputStream) throws UtilsException{
return this.parseCsvFile(format, file, CharEncoding.UTF_8, enableBomInputStream);
}
public ParserResult parseCsvFile(Format format, File file, String charset,boolean enableBomInputStream) throws UtilsException{
FileInputStream fin = null;
try{
fin = new FileInputStream(file);
return this.parseCsvFile(format, fin,charset,enableBomInputStream);
}catch(Exception e){
throw new UtilsException(e.getMessage(),e);
}
finally{
try{
if(fin!=null){
fin.close();
}
}catch(Exception eClose){
// close
}
}
}
public ParserResult parseCsvFile(Format format, InputStream is) throws UtilsException{
return parseCsvFile(format, is, CharEncoding.UTF_8, false);
}
public ParserResult parseCsvFile(Format format, InputStream is, String charset) throws UtilsException{
return parseCsvFile(format, is, charset, false);
}
public ParserResult parseCsvFile(Format format, InputStream is, boolean enableBomInputStream) throws UtilsException{
return parseCsvFile(format, is, CharEncoding.UTF_8, enableBomInputStream);
}
public ParserResult parseCsvFile(Format format, InputStream is, String charset,boolean enableBomInputStream) throws UtilsException{
Reader reader = null;
BOMInputStream bomInputStream = null;
try{
if(enableBomInputStream){
bomInputStream =
BOMInputStream.builder()
.setInputStream(is)
.setCharset(charset)
.get();
reader = new InputStreamReader(bomInputStream,charset);
}
else{
reader = new InputStreamReader(is,charset);
}
return this.parseCsvFile(format, reader);
}catch(Exception e){
throw new UtilsException(e.getMessage(),e);
}
finally{
try{
if(bomInputStream!=null){
bomInputStream.close();
}
}catch(Exception eClose){}
try{
if(reader!=null){
reader.close();
}
}catch(Exception eClose){
// close
}
}
}
public ParserResult parseCsvFile(Format format, Reader reader) throws UtilsException{
CSVParser parser = null;
ParserResult parserResult = null;
try{
parser = new CSVParser(reader, format.getCsvFormat());
parserResult = new ParserResult();
parserResult.setHeaderMap(parser.getHeaderMap());
for(CSVRecord record : parser.getRecords()){
if(format.isSkipEmptyRecord()){
if(isRecordEmpty(record)){
continue;
}
}
Record recordBean = new Record();
recordBean.setComment(record.getComment());
recordBean.setCsvLine(record.getRecordNumber());
recordBean.setRecord(record);
MapResult recordMap = new MapResult();
try{
for (int i = 0; i < this.mapping.size(); i++) {
ParserMappingRecord mappingRecord = this.mapping.get(i);
String key = mappingRecord.getName();
String valore = null;
if(mappingRecord.getConstantValue()!=null){
valore = mappingRecord.getConstantValue();
}
else{
String tmpValue = null;
if(mappingRecord.getCsvPosition()!=null){
if(mappingRecord.getCsvPosition()<record.size()){
tmpValue = record.get(mappingRecord.getCsvPosition());
}
else{
throw new Exception("Record with index ["+mappingRecord.getCsvPosition()+"] is greather or equals record size ["+record.size()+"]");
}
}
else{
if(record.isMapped(mappingRecord.getCsvColumnName())){
tmpValue = record.get(mappingRecord.getCsvColumnName());
}
else{
throw new Exception("Record with column name ["+mappingRecord.getCsvColumnName()+"] not exists");
}
}
if(tmpValue!=null && mappingRecord.getRegexpr()!=null){
String [] pattern = mappingRecord.getRegexpr();
String regExpValue = null;
for (int j = 0; j < pattern.length; j++) {
try{
regExpValue = RegularExpressionEngine.getStringMatchPattern(tmpValue, pattern[j]);
if(regExpValue!=null){
break;
}
}catch(RegExpNotFoundException notFound){}
}
if(regExpValue==null){
if(ParserRegexpNotFound.ERROR.equals(mappingRecord.getRegexpNotFoundBehaviour())){
throw new Exception("Mapping for field ["+mappingRecord.getName()+"] failed, regular expression not match (value ["+tmpValue+"])");
}
else if(ParserRegexpNotFound.NULL.equals(mappingRecord.getRegexpNotFoundBehaviour())){
tmpValue = null;
}
// else if(ParserRegexpNotFound.ORIGINAL.equals(mappingRecord.getRegexpNotFoundBehaviour())){
// //tmpValue = tmpValue;
// }
}
else{
tmpValue = regExpValue;
}
}
if(tmpValue==null && mappingRecord.getDefaultValue()!=null){
tmpValue = mappingRecord.getDefaultValue();
}
if(tmpValue==null && mappingRecord.isRequired()){
throw new Exception("Mapping for field ["+mappingRecord.getName()+"] failed");
}
valore = tmpValue;
}
recordMap.add(key, valore);
}
}catch(Exception e){
throw new Exception("Record Line["+record.getRecordNumber()+"] ["+record+"]: "+e.getMessage(),e);
}
finally{
try{
if(parser!=null){
parser.close();
parser = null;
}
}catch(Exception eClose) {
// close
}
}
recordBean.setMap(recordMap);
parserResult.getRecords().add(recordBean);
}
return parserResult;
}
catch(Exception e){
throw new UtilsException(e.getMessage(),e);
}finally{
try{
if(parser!=null){
parser.close();
}
}catch(Exception eClose){
// close
}
}
}
public static boolean isRecordEmpty(CSVRecord record){
for (int i = 0; i < record.size(); i++) {
if(record.get(i)!=null && !record.get(i).trim().equals("") ){
return false;
}
}
return true;
}
}