簡易CSV to Excel - スモールデータ集計の技

今回のテーマ

今回は、たくさんあるCSVファイルをひとまずまとめてExcelに変換するパターンです。加工はExcelにしてからいくらでもできますが、CSVがたくさんあるとExcelに読み込む段階で、手作業が面倒で心が折れませんか？わたしはそんな時はこのスクリプトでとりあえずExcelにまとめて読み込ませています。
サンプルデータを示します。

お題

No.	日時	顧客名	顧客属性	来店種別	利用金額
1	2020/10/4	"住処商事"	xxxxxx	xxxx	1300
2	2020/10/25	"香料販売"	xxxxxx	xxxx	14340
3	2020/10/25	"Satoru co., Ltd"	xxxxxx	xxxx	2340
...	....	....	....	....	....

このようなCSVファイルが、”顧客利用データ”というフォルダにまとめられているケースを想定します。

スクリプト

dim oBook, fso, ExcelApp,WShell,args,Header, CurrentFolder, ifile
set fso = CreateObject("Scripting.FileSystemObject")
Set WShell = WScript.CreateObject("WScript.Shell")
set args = WScript.Arguments
Set ExcelApp = CreateObject("Excel.Application")
set Header = CreateObject("Scripting.Dictionary")
set currentfolder = fso.GetFolder(".")
Unicodeflg = false  'false=SJIS, true= UTF16
HeaderLine = 1


set args = WScript.Arguments
FolderName = args.Item(0)
set SourceFolder = fso.GetFolder( FolderName )
set oBook = ExcelApp.Workbooks.Add   '新規ワークシート
for each ifile in SourceFolder.Files
    if right( lcase(ifile.name),4) = ".csv" then
       ReadFile( ifile.path)
    end if
next
ExcelFilePath = currentfolder.path & "\" & FolderName & ".xlsx"
oBook.saveas ExcelFilePath
oBook.close
ExcelApp.Quit
WScript.Quit


Function SetHeader(oSheet, colname, colwidth, colnum )
  oSheet.Cells(1,colnum).value = colname
  oSheet.Columns(colnum).ColumnWidth  = colwidth
  Header.Add colname, colnum
  colnum = colnum + 1
  SetHeader = colnum
end function


sub ReadFile( filepath )
   dim oSheet,fileo,sourceCount, oneline, SheetName, row, line
      set oneline = CreateObject("Scripting.Dictionary")
      set schema = CreateObject("Scripting.Dictionary")
      if fso.FileExists( filepath ) then
        WScript.echo "Read file:" & filepath
        set fileo = fso.OpenTextFile(filepath , 1, false, Unicodeflg)
        set oSheet = oBook.WorkSheets.add
        sheetname = mid( filepath, InstrRev( filepath, "\" ) + 1)
        sheetname = Replace( SheetName, ".csv", "" )
        oSheet.name = sheetname
        sourceCount = 1
        do until fileo.AtEndofStream
          line = fileo.ReadLine()
          if sourceCount = HeaderLine then
             ParseSchema line, schema
             colnum = 1
             Header.Removeall
             for each key in schema.keys
               colnum = SetHeader( oSheet, key, 15, colnum )
             next
             row = 2
          else
              ParseLine line, oneline
              for each key in schema.keys
                  oSheet.Cells(row, schema.item(key)).value = oneline.item( schema.item(key))
              next
              row = row + 1
          end if
          sourceCount = sourceCount + 1
        loop
        fileo.close
        oSheet.Range(oSheet.Cells(1,1), oSheet.Cells(row, schema.count)).AutoFilter
      else
        WScript.echo "Can't file " & filename
      end if
end sub


'*****************************************************************
'*  　CSVのタグを解析する。
'*****************************************************************
Sub ParseSchema( line, schema )
  dim columns, i
  line = Replace( line, chr(34) , "" )
  schema.removeall
  columns = split(line,",", -1)
  for i = 0 to Ubound(columns)
    if Trim(columns(i)) <> "" then
      schema.add Trim(columns(i)), i+1
    end if
  next
end sub


'*****************************************************************
'*  　CSVの一行を分解する。
'*****************************************************************
Function ParseLine( line, rowData )
  dim c, cp, qp, q, l, tmp
  rowData.RemoveAll()
  ParseLine = 0
  c = 1          'column ID
  q = 0          'quote mode
  if left(line,1) = "#" then
    ParseLine = false
    exit function
  end if
  l = trim(line)
  do while len(l) > 0
    if Left(l, 1) = chr(34) then
      q = 1
      l = Mid( l, 2, len(l)-1)    '左端の"をカット
    else
      q = 0
    end if
    if q then
      qp = InStr(l, Chr(34))
      if qp = 0 then 
         WScript.echo "フォーマットエラー:" & line
         exit function
      else
         tmp = left(l, qp -1 )
         if len(trim(tmp)) = 0 then
           rowData.add c, ""
         else
           rowData.add c, tmp
         end if
         l = mid( l, qp + 1, len(l)- qp )
         cp = InStr(l, ",")
         if cp = 0 then
           ParseLine = 1
           exit function
         end if
         l = mid( l, cp + 1, len(l)- cp )
'         l = Trim(mid( l, cp + 1, len(l)- cp ))
         c = c + 1
      end if
    else
      cp = InStr(l, ",")
      if cp = 0 then
        rowData.add c, trim(l)
        ParseLine = 1
        exit function
      end if
      tmp = left(l, cp - 1)
      if len(trim(tmp)) = 0 then
        rowData.add c , ""
      else
        rowData.add c, tmp
      end if
      l = mid( l, cp + 1, len(l)- cp ) 
'      l = Trim(mid( l, cp + 1, len(l)- cp )) 
      c = c + 1
    end if
  loop
  ParseLine = 1
end Function

CSVファイルをフォルダに保存し、このスクリプトを下記で実行します。
cscript CSVtoExcel.vbs "顧客利用データ"
フォルダ名のExcelファイルが生成され、CSVファイル1ファイルあたり１シートが生成されます。

スクリプトの９行目のHeaderLineにCSVファイルのヘッダー行をセットしています。
この値を変更することで、ヘッダー行の前に説明文などがある場合に対応しています。例えばヘッダー行が３行目であれば、この値を3にしてください。
本スクリプトではヘッダー行が１行目になるように読み込みを行い、ヘッダー行に対してオートフィルターの設定をしています。
データを書き込んでいるのは59行目なので、書き込む際にフィルタを設定して条件にあったデータのみ書き込むようにしたり、必要に応じて書き換えを行うなどは、この行を変更することで可能です。

なお、本スクリプトはCSVのファイル形式がShiftJISを想定しています。Unicode16の場合は、8行目のUnicodeFlgをTrueに変更してください。
Unicode8には対応していません。Unicode８対応についても、いつか本ブログで紹介します。

*Excelは、米国Microsoft Corporationの米国およびその他の国における登録商標または商標です。