Commit b7b1506e authored by Martin Pépin's avatar Martin Pépin
Browse files

First version

parents
No related merge requests found
Showing with 213 additions and 0 deletions
+213 -0
.gitignore 0 → 100644
data/
test/
_build/
*.byte
*.mllib
*.mldylib
*.odocl
setup.*
myocamlbuild.ml
Makefile 0 → 100644
.PHONY: all configure doc
all: configure
ocaml setup.ml -build
doc configure: setup.ml
ocaml setup.ml -$@
setup.ml:
oasis setup
clean:
ocaml setup.ml -clean
README.md 0 → 100644
# Eurostat Converter
A library to deal with Eurostat databases
_oasis 0 → 100644
OASISFormat: 0.4
Name: EurostatConverter
Version: 0.1
Synopsis: A library to deal with Eurostat databases
Authors: Martin Pépin
License: CC0
Library eurostat
Path: src/
BuildTools: ocamlbuild
Modules: Eurostat
BuildDepends: csv, str
Executable eurostat_converter
Path: src/
MainIs: main.ml
BuildDepends: csv, str
BuildTools: ocamlbuild
CompiledObject: best
Install: true
Document API
Title: API reference for Eurostat
Type: OCamlbuild (0.4)
InstallDir: $docdir/api
BuildTools: ocamldoc, ocamlbuild
XOCamlbuildPath: .
XOCamlbuildLibraries: eurostat
_tags 0 → 100644
# OASIS_START
# DO NOT EDIT (digest: 53dfa658da2f2fe020eb13f56bf8776d)
# Ignore VCS directories, you can use the same kind of rule outside
# OASIS_START/STOP if you want to exclude directories that contains
# useless stuff for the build process
true: annot, bin_annot
<**/.svn>: -traverse
<**/.svn>: not_hygienic
".bzr": -traverse
".bzr": not_hygienic
".hg": -traverse
".hg": not_hygienic
".git": -traverse
".git": not_hygienic
"_darcs": -traverse
"_darcs": not_hygienic
# Library eurostat
"src/eurostat.cmxs": use_eurostat
# Executable eurostat_converter
<src/main.{native,byte}>: pkg_csv
<src/main.{native,byte}>: pkg_str
<src/*.ml{,i,y}>: pkg_csv
<src/*.ml{,i,y}>: pkg_str
# OASIS_STOP
true: warn(A)
type t = {
name : string ;
labels : string list ;
content : (string list, string) Hashtbl.t
}
let empty name labels = {
name = name ;
labels = labels ;
content = Hashtbl.create 17
}
let add_value db key value = Hashtbl.add db.content key value
let get_name db = db.name
(** {2 Input} *)
let parse_header = function
| [] | [_] -> assert false
| h::h_vals ->
(Str.split (Str.regexp "[,\\]") h, h_vals)
let parse_row db h_vals = function
| [] | [_] -> assert false
| key_::row ->
let key = Str.split (Str.regexp ",") key_ in
List.iter2
(fun h_val value -> add_value db (key@[h_val]) value)
h_vals
row
let from_channel ic name =
let csv = Csv.of_channel ~separator:'\t' ic in
let (labels, h_vals) = parse_header (Csv.next csv) in
let db = empty name labels in
Csv.iter ~f:(parse_row db h_vals) csv;
db
let from_file ?(name="") filename =
let ic = open_in filename in
let db_name = (
if name = "" then
try Filename.chop_suffix filename ".tsv"
with Invalid_argument _ -> filename
else name
) in
try
let db = from_channel ic db_name in
close_in ic;
db
with err -> close_in_noerr ic ; raise err
(** {2 Output} *)
let rec write_sep_list oc sep = function
| [] -> ()
| [s] -> Printf.fprintf oc "%s" s
| t::q -> Printf.fprintf oc "%s%c" t sep; write_sep_list oc sep q
let to_csv_channel ?(sep=',') oc db =
(* Header *)
write_sep_list oc sep db.labels ;
Printf.fprintf oc "%c%s\n" sep db.name;
(* Body *)
let print_binding key value =
write_sep_list oc sep key ;
Printf.fprintf oc "%c%s" sep value ;
Printf.fprintf oc "\n" in
Hashtbl.iter print_binding db.content
type t
(** Representation of Eurostat databases *)
val empty : string -> string list -> t
(** [empty name labels] returns a fresh database with name [name] and a list of
labels [labels]. *)
val add_value : t -> string list -> string -> unit
(** [add_value db key value] adds the value [value] associated to the key [key]
into the database [db]. *)
val get_name : t -> string
(** Returns the name of a given database *)
(** {2 Input} *)
val from_channel : in_channel -> string -> t
(** [from_channel ic name] reads an eurostat database from the input channel
[ic] and returns a database with name [name]. *)
val from_file : ?name:string -> string -> t
(** [from_string ~name:"foo" filename] does the same as from_channel but reads
the database from the file [filename] and names it using the optional
argument [name]. If [name] is empty or not specified, the filename (without
the file extension) is used instead. *)
(** {2 Output} *)
val to_csv_channel : ?sep:char -> out_channel -> t -> unit
(** [to_csv_channel ~sep:';' oc db] writes the database in the csv format into
the channel [oc]. The [sep] argument can be used to specify the separator,
comma ([',']) is the default. *)
src/main.ml 0 → 100644
let ifile = ref ""
let set_str f s = f := s
let options = []
let usage = "usage: converter.byte file.tsv"
let () =
(* Command line parsing *)
Arg.parse options (set_str ifile) usage;
if not (Filename.check_suffix (!ifile) ".tsv") then begin
Printf.eprintf "The input file must have the .tsv extension\n";
exit 1
end;
(* Output *)
let db = Eurostat.from_file !ifile in
let output = (Eurostat.get_name db) ^ "_out.csv" in
let oc = open_out output in
begin try
Eurostat.to_csv_channel oc db ;
Printf.printf "File %s converted. Output is: %s.\n" !ifile output ;
close_out oc
with e -> close_out_noerr oc; raise e end;
(* Graceful exit *)
exit 0
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment