123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228(** Most lowercase identifier are perfectly fine as far as Macaque is
concerned, but some may fail at "sql query generation time"
because they turn out to be SQL reserved keywords. This runtime
failure has been reported by Vincent Valat.
We here have a list of SQL keywords and are careful to warn the
user when one is used, and to "escape" it into a non-reserved
identifier.
Macaque currently only supports PostgreSQL, so it would make sense
to care about PostgreSQL reserved keywords only, but this will
hopefully change someday in the future so I decided to also
consider reserved keywords as defined in the 2003 SQL standard.
*)typereserved_keyword_status={reserved_in_sql2003:bool;reserved_in_postgresql:bool;}letreserved_keywords=letkeywordpgsqlsql03name=ifname<>String.uppercasenamethenfailwith(Printf.sprintf"Reserved keyword %S should be uppercase"name);(name,{reserved_in_sql2003=sql03;reserved_in_postgresql=pgsql})inletrpgsqlsql03name=[keywordpgsqlsql03name]inletlpgsqlsql03names=List.map(keywordpgsqlsql03)namesin(* http://www.postgresql.org/docs/8.3/static/sql-keywords-appendix.html *)(* pgSQl SQL03 *)[]@rfalsetrue"ABS"@rtruetrue"ALL"@rfalsetrue"ALLOCATE"@rfalsetrue"ALTER"@ltruefalse["ANALYZE";"ANALYSE"]@ltruetrue["AND";"ANY"]@rfalsetrue"ARE"@ltruetrue["ARRAY";"AS"]@rtruefalse"ASC"@rfalsetrue"ASENSITIVE"@rtruefalse"ASYMMETRIC"@lfalsetrue["AT";"ATOMIC"]@rtruetrue"AUTHORIZATION"@lfalsetrue["AVG";"BEGIN"]@rtruetrue"BETWEEN"@rfalsetrue"BIGINT"@rtruetrue"BINARY"@lfalsetrue["BLOB";"BOOLEAN"]@rtruetrue"BOTH"@lfalsetrue["BY";"CALL";"CALLED";"CARDINALITY";"CASCADED"]@ltruetrue["CASE";"CAST"]@lfalsetrue["CEIL";"CEILING";"CHAR";"CHARACTER";"CHARACTER_LENGTH";"CHAR_LENGTH"]@rtruetrue"CHECK"@lfalsetrue["CLOB";"CLOSE";"COALESCE"]@rtruetrue"COLLATE"@rfalsetrue"COLLECT"@rtruetrue"COLUMN"@lfalsetrue["COMMIT";"CONDITION";"CONNECT"]@rtruetrue"CONSTRAINT"@lfalsetrue["CONVERT";"CORR";"CORRESPONDING";"COUNT";"COVAR_POP";"COVAR_SAMP"]@ltruetrue["CREATE";"CROSS"]@lfalsetrue["CUBE";"CUME_DIST";"CURRENT"]@rtruetrue"CURRENT_DATE"@lfalsetrue["CURRENT_DEFAULT_TRANSFORM_GROUP";"CURRENT_PATH"]@ltruetrue["CURRENT_ROLE";"CURRENT_TIME";"CURRENT_TIMESTAMP"]@rfalsetrue"CURRENT_TRANSFORM_GROUP_FOR_TYPE"@rtruetrue"CURRENT_USER"@lfalsetrue["CURSOR";"CYCLE";"DATE";"DAY";"DEALLOCATE";"DEC";"DECIMAL";"DECLARE"]@rtruetrue"DEFAULT"@rtruefalse"DEFERRABLE"@lfalsetrue["DELETE";"DENSE_RANK";"DEREF"]@rtruefalse"DESC"@lfalsetrue["DESCRIBE";"DETERMINISTIC";"DISCONNECT"]@rtruetrue"DISTINCT"@rtruefalse"DO"@lfalsetrue["DOUBLE";"DROP";"DYNAMIC";"EACH";"ELEMENT"]@ltruetrue["ELSE";"END"]@lfalsetrue["END-EXEC";"ESCAPE";"EVERY";"EXCEPT";"EXEC";"EXECUTE";"EXISTS";"EXP";"EXTERNAL";"EXTRACT"]@rtruetrue"FALSE"@lfalsetrue["FETCH";"FILTER";"FLOAT";"FLOOR"]@ltruetrue["FOR";"FOREIGN"]@rfalsetrue"FREE"@rtruefalse"FREEZE"@ltruetrue["FROM";"FULL"]@lfalsetrue["FUNCTION";"FUSION";"GET";"GLOBAL"]@ltruetrue["GRANT";"GROUP"]@rfalsetrue"GROUPING"@rtruetrue"HAVING"@lfalsetrue["HOLD";"HOUR";"IDENTITY"]@rfalsetrue"ILIKE"@rtruetrue"IN"@rfalsetrue"INDICATOR"@rtruefalse"INITIALLY"@rtruetrue"INNER"@lfalsetrue["INOUT";"INSENSITIVE";"INSERT";"INT";"INTEGER";"INTERVAL"]@ltruetrue["INTO";"IS"]@rtruefalse"ISNULL"@rtruetrue"JOIN"@lfalsetrue["LANGUAGE";"LARGE";"LATERAL"]@ltruetrue["LEADING";"LEFT";"LIKE"]@rtruefalse"LIMIT"@lfalsetrue["LN";"LOCAL"]@ltruetrue["LOCALTIME";"LOCALTIMESTAMP"]@lfalsetrue["LOWER";"MATCH";"MAX";"MEMBER";"MERGE";"METHOD";"MIN";"MINUTE";"MOD";"MODIFIES";"MODULE";"MONTH";"MULTISET";"NATIONAL"]@rtruetrue"NATURAL"@lfalsetrue["NCHAR";"NCLOB"]@rtruetrue"NEW"@lfalsetrue["NO";"NONE";"NORMALIZE"]@rtruetrue"NOT"@rfalsetrue"NOTNULL"@rtruetrue"NULL"@lfalsetrue["NULLIF";"NUMERIC";"OCTET_LENGTH";"OF"]@ltruefalse["OFF";"OFFSET"]@ltruetrue["OLD";"ON";"ONLY"]@rfalsetrue"OPEN"@ltruetrue["OR";"ORDER"]@rfalsetrue"OUT"@ltruetrue["OUTER";"OVERLAP"]@lfalsetrue["OVERLAY";"PARAMETER";"PARTITION";"PERCENTILE_CONT";"PERCENTILE_DISK";"PERCENT_RANK"]@rtruefalse"PLACING"@lfalsetrue["POSITION";"POWER";"PRECISION";"PREPARE"]@rtruetrue"PRIMARY"@lfalsetrue["PROCEDURE";"RANGE";"RANK";"READS";"REAL";"RECURSIVE";"REF"]@rtruetrue"REFERENCES"@lfalsetrue["REFERENCING";"REGR_AVGX";"REGR_AVGY";"REGR_COUNT";"REGR_INTERCEPT";"REGR_R2";"REGR_SLOPE";"REGR_SXX";"REGR_SXY";"REGR_SYY";"RELEASE";"RESULT";"RETURN"]@rtruefalse"RETURNING"@lfalsetrue["RETURNS";"REVOKE"]@rtruetrue"RIGHT"@lfalsetrue["ROLLBACK";"ROLLUP";"ROW";"ROWS";"ROW_NUMBER";"SCOPE";"SCROLL";"SEARCH";"SECOND"]@rtruetrue"SELECT"@rfalsetrue"SENSITIVE"@rtruetrue"SESSION_USER"@rfalsetrue"SET"@rtruetrue"SIMILAR"@rfalsetrue"SMALLINT"@rtruetrue"SOME"@lfalsetrue["SPECIFIC";"SPECIFICTYPE";"SQL";"SQLEXCEPTION";"SQLSTATE";"SQLWARNING";"SQRT";"START";"STATIC";"STDDEV_POP";"STDDEV_SAMP";"SUBMULTISET";"SUBSTRING";"SUM"]@rtruetrue"SYMMETRIC"@lfalsetrue["SYSTEM";"SYSTEM_USER"]@rtruetrue"TABLE"@rfalsetrue"TABLESAMPLE"@rtruetrue"THEN"@lfalsetrue["TIME";"TIMESTAMP";"TIMEZONE_HOUR";"TIMEZONE_MINUTE"]@ltruetrue["TO";"TRAILING"]@lfalsetrue["TRANSLATE";"TRANSLATION";"TREAT";"TRIGGER";"TRIM"]@rtruetrue"TRUE"@rfalsetrue"UESCAPE"@ltruetrue["UNION";"UNIQUE"]@lfalsetrue["UNKNOWN";"UNNEST";"UPDATE";"UPPER"]@ltruetrue["USER";"USING"]@lfalsetrue["VALUE";"VALUES";"VARCHAR";"VARYING";"VAR_POP";"VAR_SAMP"]@rtruefalse"VERBOSE"@rtruetrue"WHEN"@rfalsetrue"WHENEVER"@rtruetrue"WHERE"@lfalsetrue["WIDTH_BUCKET";"WINDOW"]@rtruetrue"WITH"@lfalsetrue["WITHIN";"WITHOUT";"XML";"XMLAGG";"XMLATTRIBUTES";"XMLBINARY";"XMLCOMMENT";"XMLCONCAT";"XMLELEMENT";"XMLFOREST";"XMLNAMESPACES";"XMLPARSE";"XMLPI";"XMLROOT";"XMLSERIALIZE";"YEAR"](** SQL compatibility warning:
We are going to "quote" identifiers that correspond to reserved
keywords, so that the query still stays syntactically correct. An
issue with automatic quoting is that quoted identifiers, beside
being allowed to contain reserved words, are taken in
a case-sensitive manner while the rest of SQL is case-insensitive,
in the sense that they are implicitly normalized by the SQL
server.
Now there are funny problem that may arise with this: if you
define a table as tAbLe, it will internally be define as TABLE
(if normalized to uppercase) on the server side, and requesting
the table "tAbLe" will then fail with a "table not found" error.
Our choice is therefore to case-normalize reserved identifiers
before quoting them.
Finally, PostGreSQL does not follow the SQL norm of normalizing
identifiers to uppercase, it instead normalizes to lowercase. As
long as Macaque is pgsql-only, we choose lowercase here, but that
will have to be runtime-configurable in a hopeful future where
Macaque gets ported to other backends.
*)letnormalize_keyword_case=String.lowercase(** It is rather awkward to protect SQL identifiers here, at the
parser level. It would make more sense to preserve the user-input
identifier as far as possible, that is upto the SQL query
generation. However, this would require sharing the keyword-base
code between the Camlp4 extension (which needs to have it to
generate the warnings) and the output code, which is rather
awkward to do with the .cmo loading scheme used for Camlp4
extensions. Doing everything in the extension is just more
convenient. *)letkeyword_safeidentifier=letkw=String.uppercaseidentifierinifnot(List.mem_assockwreserved_keywords)thenidentifierelseletstatus=List.assockwreserved_keywordsinifnotstatus.reserved_in_postgresqlthenidentifierelse(* note that we use String.escaped here to escape an
identifier, not a string litteral. SQL string litteral
escaping conventions are different and handled by the
"escape_string" function in sql_printers.ml *)Printf.sprintf"%S"(normalize_keyword_caseidentifier)